Allow listened conversations to be non-blocking
[akaros.git] / kern / src / net / devip.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 enum {
17         Qtopdir = 1,                            /* top level directory */
18         Qtopbase,
19         Qarp = Qtopbase,
20         Qbootp,
21         Qndb,
22         Qiproute,
23         Qiprouter,
24         Qipselftab,
25         Qlog,
26
27         Qprotodir,      /* directory for a protocol */
28         Qprotobase,
29         Qclone = Qprotobase,
30         Qstats,
31
32         Qconvdir,       /* directory for a conversation */
33         Qconvbase,
34         Qctl = Qconvbase,
35         Qdata,
36         Qerr,
37         Qlisten,
38         Qlocal,
39         Qremote,
40         Qstatus,
41         Qsnoop,
42
43         Logtype = 5,
44         Masktype = (1 << Logtype) - 1,
45         Logconv = 12,
46         Maskconv = (1 << Logconv) - 1,
47         Shiftconv = Logtype,
48         Logproto = 8,
49         Maskproto = (1 << Logproto) - 1,
50         Shiftproto = Logtype + Logconv,
51
52         Nfs = 32,
53 };
54 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
55 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
56 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
57 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
58 static char network[] = "network";
59
60 qlock_t fslock;
61 struct Fs *ipfs[Nfs];                   /* attached fs's */
62 struct queue *qlog;
63
64 extern void nullmediumlink(void);
65 extern void pktmediumlink(void);
66 extern char *eve;
67 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
68 static void closeconv(struct conv *);
69
70 static inline int founddevdir(struct chan *c, struct qid q, char *n,
71                                                           int64_t length, char *user, long perm,
72                                                           struct dir *db)
73 {
74         devdir(c, q, n, length, user, perm, db);
75         return 1;
76 }
77
78 static int topdirgen(struct chan *c, struct dir *dp)
79 {
80         struct qid q;
81         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
82         snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lu", c->dev);
83         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
84 }
85
86
87 static int ip3gen(struct chan *c, int i, struct dir *dp)
88 {
89         struct qid q;
90         struct conv *cv;
91         char *p;
92
93         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
94         if (cv->owner == NULL)
95                 kstrdup(&cv->owner, eve);
96         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
97
98         switch (i) {
99                 default:
100                         return -1;
101                 case Qctl:
102                         return founddevdir(c, q, "ctl", 0,
103                                                    cv->owner, cv->perm, dp);
104                 case Qdata:
105                         return founddevdir(c, q, "data", qlen(cv->rq),
106                                                            cv->owner, cv->perm, dp);
107                 case Qerr:
108                         return founddevdir(c, q, "err", qlen(cv->eq),
109                                                            cv->owner, cv->perm, dp);
110                 case Qlisten:
111                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
112                 case Qlocal:
113                         p = "local";
114                         break;
115                 case Qremote:
116                         p = "remote";
117                         break;
118                 case Qsnoop:
119                         if (strcmp(cv->p->name, "ipifc") != 0)
120                                 return -1;
121                         return founddevdir(c, q, "snoop", qlen(cv->sq),
122                                                            cv->owner, 0400, dp);
123                 case Qstatus:
124                         p = "status";
125                         break;
126         }
127         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
128 }
129
130 static int ip2gen(struct chan *c, int i, struct dir *dp)
131 {
132         struct qid q;
133         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
134         switch (i) {
135                 case Qclone:
136                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
137                 case Qstats:
138                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
139         }
140         return -1;
141 }
142
143 static int ip1gen(struct chan *c, int i, struct dir *dp)
144 {
145         struct qid q;
146         char *p;
147         int prot;
148         int len = 0;
149         struct Fs *f;
150         extern uint32_t kerndate;
151
152         f = ipfs[c->dev];
153
154         prot = 0666;
155         mkqid(&q, QID(0, 0, i), 0, QTFILE);
156         switch (i) {
157                 default:
158                         return -1;
159                 case Qarp:
160                         p = "arp";
161                         break;
162                 case Qbootp:
163                         if (bootp == NULL)
164                                 return 0;
165                         p = "bootp";
166                         break;
167                 case Qndb:
168                         p = "ndb";
169                         len = strlen(f->ndb);
170                         q.vers = f->ndbvers;
171                         break;
172                 case Qiproute:
173                         p = "iproute";
174                         break;
175                 case Qipselftab:
176                         p = "ipselftab";
177                         prot = 0444;
178                         break;
179                 case Qiprouter:
180                         p = "iprouter";
181                         break;
182                 case Qlog:
183                         p = "log";
184                         break;
185         }
186         devdir(c, q, p, len, network, prot, dp);
187         if (i == Qndb && f->ndbmtime > kerndate)
188                 dp->mtime = f->ndbmtime;
189         return 1;
190 }
191
192 static int
193 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
194           int s, struct dir *dp)
195 {
196         struct qid q;
197         struct conv *cv;
198         struct Fs *f;
199
200         f = ipfs[c->dev];
201
202         switch (TYPE(c->qid)) {
203                 case Qtopdir:
204                         if (s == DEVDOTDOT)
205                                 return topdirgen(c, dp);
206                         if (s < f->np) {
207                                 if (f->p[s]->connect == NULL)
208                                         return 0;       /* protocol with no user interface */
209                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
210                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
211                         }
212                         s -= f->np;
213                         return ip1gen(c, s + Qtopbase, dp);
214                 case Qarp:
215                 case Qbootp:
216                 case Qndb:
217                 case Qlog:
218                 case Qiproute:
219                 case Qiprouter:
220                 case Qipselftab:
221                         return ip1gen(c, TYPE(c->qid), dp);
222                 case Qprotodir:
223                         if (s == DEVDOTDOT)
224                                 return topdirgen(c, dp);
225                         else if (s < f->p[PROTO(c->qid)]->ac) {
226                                 cv = f->p[PROTO(c->qid)]->conv[s];
227                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
228                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
229                                 return
230                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
231                         }
232                         s -= f->p[PROTO(c->qid)]->ac;
233                         return ip2gen(c, s + Qprotobase, dp);
234                 case Qclone:
235                 case Qstats:
236                         return ip2gen(c, TYPE(c->qid), dp);
237                 case Qconvdir:
238                         if (s == DEVDOTDOT) {
239                                 s = PROTO(c->qid);
240                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
241                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
242                                 return 1;
243                         }
244                         return ip3gen(c, s + Qconvbase, dp);
245                 case Qctl:
246                 case Qdata:
247                 case Qerr:
248                 case Qlisten:
249                 case Qlocal:
250                 case Qremote:
251                 case Qstatus:
252                 case Qsnoop:
253                         return ip3gen(c, TYPE(c->qid), dp);
254         }
255         return -1;
256 }
257
258 static void ipinit(void)
259 {
260         qlock_init(&fslock);
261         nullmediumlink();
262         pktmediumlink();
263 /* if only
264         fmtinstall('i', eipfmt);
265         fmtinstall('I', eipfmt);
266         fmtinstall('E', eipfmt);
267         fmtinstall('V', eipfmt);
268         fmtinstall('M', eipfmt);
269 */
270 }
271
272 static void ipreset(void)
273 {
274 }
275
276 static struct Fs *ipgetfs(int dev)
277 {
278         extern void (*ipprotoinit[]) (struct Fs *);
279         struct Fs *f;
280         int i;
281
282         if (dev >= Nfs)
283                 return NULL;
284
285         qlock(&fslock);
286         if (ipfs[dev] == NULL) {
287                 f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
288                 rwinit(&f->rwlock);
289                 qlock_init(&f->iprouter.qlock);
290                 ip_init(f);
291                 arpinit(f);
292                 netloginit(f);
293                 for (i = 0; ipprotoinit[i]; i++)
294                         ipprotoinit[i] (f);
295                 f->dev = dev;
296                 ipfs[dev] = f;
297         }
298         qunlock(&fslock);
299
300         return ipfs[dev];
301 }
302
303 struct IPaux *newipaux(char *owner, char *tag)
304 {
305         struct IPaux *a;
306         int n;
307
308         a = kzmalloc(sizeof(*a), 0);
309         kstrdup(&a->owner, owner);
310         memset(a->tag, ' ', sizeof(a->tag));
311         n = strlen(tag);
312         if (n > sizeof(a->tag))
313                 n = sizeof(a->tag);
314         memmove(a->tag, tag, n);
315         return a;
316 }
317
318 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
319
320 static struct chan *ipattach(char *spec)
321 {
322         struct chan *c;
323         int dev;
324
325         dev = atoi(spec);
326         if (dev >= Nfs)
327                 error("bad specification");
328
329         ipgetfs(dev);
330         c = devattach('I', spec);
331         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
332         c->dev = dev;
333
334         c->aux = newipaux(commonuser(), "none");
335
336         return c;
337 }
338
339 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
340                                                           int nname)
341 {
342         struct IPaux *a = c->aux;
343         struct walkqid *w;
344
345         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
346         if (w != NULL && w->clone != NULL)
347                 w->clone->aux = newipaux(a->owner, a->tag);
348         return w;
349 }
350
351 static int ipstat(struct chan *c, uint8_t * db, int n)
352 {
353         return devstat(c, db, n, NULL, 0, ipgen);
354 }
355
356 static int should_wake(void *arg)
357 {
358         struct conv *cv = arg;
359         /* signal that the conv is closed */
360         if (qisclosed(cv->rq))
361                 return TRUE;
362         return cv->incall != NULL;
363 }
364
365 static int m2p[] = {
366         [OREAD] 4,
367         [OWRITE] 2,
368         [ORDWR] 6
369 };
370
371 static struct chan *ipopen(struct chan *c, int omode)
372 {
373         ERRSTACK(2);
374         struct conv *cv, *nc;
375         struct Proto *p;
376         int perm;
377         struct Fs *f;
378
379         perm = m2p[omode & 3];
380
381         f = ipfs[c->dev];
382
383         switch (TYPE(c->qid)) {
384                 default:
385                         break;
386                 case Qndb:
387                         if (omode & (OWRITE | OTRUNC) && !iseve())
388                                 error(Eperm);
389                         if ((omode & (OWRITE | OTRUNC)) == (OWRITE | OTRUNC))
390                                 f->ndb[0] = 0;
391                         break;
392                 case Qlog:
393                         netlogopen(f);
394                         break;
395                 case Qiprouter:
396                         iprouteropen(f);
397                         break;
398                 case Qiproute:
399                         break;
400                 case Qtopdir:
401                 case Qprotodir:
402                 case Qconvdir:
403                 case Qstatus:
404                 case Qremote:
405                 case Qlocal:
406                 case Qstats:
407                 case Qbootp:
408                 case Qipselftab:
409                         if (!IS_RDONLY(omode))
410                                 error(Eperm);
411                         break;
412                 case Qsnoop:
413                         if (!IS_RDONLY(omode))
414                                 error(Eperm);
415                         p = f->p[PROTO(c->qid)];
416                         cv = p->conv[CONV(c->qid)];
417                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
418                                 error(Eperm);
419                         atomic_inc(&cv->snoopers);
420                         break;
421                 case Qclone:
422                         p = f->p[PROTO(c->qid)];
423                         qlock(&p->qlock);
424                         if (waserror()) {
425                                 qunlock(&p->qlock);
426                                 nexterror();
427                         }
428                         cv = Fsprotoclone(p, ATTACHER(c));
429                         qunlock(&p->qlock);
430                         poperror();
431                         if (cv == NULL) {
432                                 error(Enodev);
433                                 break;
434                         }
435                         /* we only honor nonblock on a clone */
436                         if (c->flag & CNONBLOCK)
437                                 Fsconvnonblock(cv, TRUE);
438                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
439                         break;
440                 case Qdata:
441                 case Qctl:
442                 case Qerr:
443                         p = f->p[PROTO(c->qid)];
444                         qlock(&p->qlock);
445                         cv = p->conv[CONV(c->qid)];
446                         qlock(&cv->qlock);
447                         if (waserror()) {
448                                 qunlock(&cv->qlock);
449                                 qunlock(&p->qlock);
450                                 nexterror();
451                         }
452                         if ((perm & (cv->perm >> 6)) != perm) {
453                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
454                                         error(Eperm);
455                                 if ((perm & cv->perm) != perm)
456                                         error(Eperm);
457
458                         }
459                         cv->inuse++;
460                         if (cv->inuse == 1) {
461                                 kstrdup(&cv->owner, ATTACHER(c));
462                                 cv->perm = 0660;
463                         }
464                         qunlock(&cv->qlock);
465                         qunlock(&p->qlock);
466                         poperror();
467                         break;
468                 case Qlisten:
469                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
470                         if ((perm & (cv->perm >> 6)) != perm) {
471                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
472                                         error(Eperm);
473                                 if ((perm & cv->perm) != perm)
474                                         error(Eperm);
475
476                         }
477
478                         if (cv->state != Announced)
479                                 error("not announced");
480
481                         if (waserror()) {
482                                 closeconv(cv);
483                                 nexterror();
484                         }
485                         qlock(&cv->qlock);
486                         cv->inuse++;
487                         qunlock(&cv->qlock);
488
489                         nc = NULL;
490                         while (nc == NULL) {
491                                 /* give up if we got a hangup */
492                                 if (qisclosed(cv->rq))
493                                         error("listen hungup");
494
495                                 qlock(&cv->listenq);
496                                 if (waserror()) {
497                                         qunlock(&cv->listenq);
498                                         nexterror();
499                                 }
500                                 /* we can peek at incall without grabbing the cv qlock.  if
501                                  * anything is there, it'll remain there until we dequeue it.
502                                  * no one else can, since we hold the listenq lock */
503                                 if (cv->nonblock && !cv->incall) {
504                                         set_errno(EAGAIN);
505                                         error("listen queue empty");
506                                 }
507                                 /* wait for a connect */
508                                 rendez_sleep(&cv->listenr, should_wake, cv);
509
510                                 /* if there is a concurrent hangup, they will hold the qlock
511                                  * until the hangup is complete, including closing the cv->rq */
512                                 qlock(&cv->qlock);
513                                 nc = cv->incall;
514                                 if (nc != NULL) {
515                                         cv->incall = nc->next;
516                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
517                                         kstrdup(&cv->owner, ATTACHER(c));
518                                         /* O_NONBLOCK/CNONBLOCK when opening listen means the *new*
519                                          * conv is already non-blocking, like accept4() in Linux */
520                                         if (c->flag & CNONBLOCK)
521                                                 Fsconvnonblock(nc, TRUE);
522                                 }
523                                 qunlock(&cv->qlock);
524
525                                 qunlock(&cv->listenq);
526                                 poperror();
527                         }
528                         closeconv(cv);
529                         poperror();
530                         break;
531         }
532         c->mode = openmode(omode);
533         c->flag |= COPEN;
534         c->offset = 0;
535         return c;
536 }
537
538 static int ipwstat(struct chan *c, uint8_t * dp, int n)
539 {
540         ERRSTACK(2);
541         struct dir *d;
542         struct conv *cv;
543         struct Fs *f;
544         struct Proto *p;
545
546         f = ipfs[c->dev];
547         switch (TYPE(c->qid)) {
548                 default:
549                         error(Eperm);
550                         break;
551                 case Qctl:
552                 case Qdata:
553                         break;
554         }
555
556         d = kzmalloc(sizeof(*d) + n, 0);
557         if (waserror()) {
558                 kfree(d);
559                 nexterror();
560         }
561         n = convM2D(dp, n, d, (char *)&d[1]);
562         if (n == 0)
563                 error(Eshortstat);
564         p = f->p[PROTO(c->qid)];
565         cv = p->conv[CONV(c->qid)];
566         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
567                 error(Eperm);
568         if (!emptystr(d->uid))
569                 kstrdup(&cv->owner, d->uid);
570         if (d->mode != ~0UL)
571                 cv->perm = d->mode & 0777;
572         poperror();
573         kfree(d);
574         return n;
575 }
576
577 /* Should be able to handle any file type chan. Feel free to extend it. */
578 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
579 {
580         struct conv *conv;
581         struct Proto *proto;
582         char *p;
583         struct Fs *f;
584
585         f = ipfs[ch->dev];
586
587         switch (TYPE(ch->qid)) {
588                 default:
589                         ret = "Unknown type";
590                         break;
591                 case Qdata:
592                         proto = f->p[PROTO(ch->qid)];
593                         conv = proto->conv[CONV(ch->qid)];
594                         snprintf(ret, ret_l, "Qdata, proto %s, conv idx %d", proto->name,
595                                          conv->x);
596                         break;
597                 case Qarp:
598                         ret = "Qarp";
599                         break;
600                 case Qiproute:
601                         ret = "Qiproute";
602                         break;
603                 case Qlog:
604                         ret = "Qlog";
605                         break;
606                 case Qndb:
607                         ret = "Qndb";
608                         break;
609                 case Qctl:
610                         proto = f->p[PROTO(ch->qid)];
611                         conv = proto->conv[CONV(ch->qid)];
612                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
613                                          conv->x);
614                         break;
615         }
616         return ret;
617 }
618
619 static void closeconv(struct conv *cv)
620 {
621         struct conv *nc;
622         struct Ipmulti *mp;
623
624         qlock(&cv->qlock);
625
626         if (--cv->inuse > 0) {
627                 qunlock(&cv->qlock);
628                 return;
629         }
630
631         /* close all incoming calls since no listen will ever happen */
632         for (nc = cv->incall; nc; nc = cv->incall) {
633                 cv->incall = nc->next;
634                 closeconv(nc);
635         }
636         cv->incall = NULL;
637
638         kstrdup(&cv->owner, network);
639         cv->perm = 0660;
640
641         while ((mp = cv->multi) != NULL)
642                 ipifcremmulti(cv, mp->ma, mp->ia);
643
644         cv->r = NULL;
645         cv->rgen = 0;
646         cv->p->close(cv);
647         cv->state = Idle;
648         qunlock(&cv->qlock);
649 }
650
651 static void ipclose(struct chan *c)
652 {
653         struct Fs *f;
654
655         f = ipfs[c->dev];
656         switch (TYPE(c->qid)) {
657                 default:
658                         break;
659                 case Qlog:
660                         if (c->flag & COPEN)
661                                 netlogclose(f);
662                         break;
663                 case Qiprouter:
664                         if (c->flag & COPEN)
665                                 iprouterclose(f);
666                         break;
667                 case Qdata:
668                 case Qctl:
669                 case Qerr:
670                         if (c->flag & COPEN)
671                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
672                         break;
673                 case Qsnoop:
674                         if (c->flag & COPEN)
675                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
676                         break;
677         }
678         kfree(((struct IPaux *)c->aux)->owner);
679         kfree(c->aux);
680 }
681
682 enum {
683         Statelen = 32 * 1024,
684 };
685
686 static long ipread(struct chan *ch, void *a, long n, int64_t off)
687 {
688         struct conv *c;
689         struct Proto *x;
690         char *buf, *p;
691         long rv;
692         struct Fs *f;
693         uint32_t offset = off;
694         size_t sofar;
695
696         f = ipfs[ch->dev];
697
698         p = a;
699         switch (TYPE(ch->qid)) {
700                 default:
701                         error(Eperm);
702                 case Qtopdir:
703                 case Qprotodir:
704                 case Qconvdir:
705                         return devdirread(ch, a, n, 0, 0, ipgen);
706                 case Qarp:
707                         return arpread(f->arp, a, offset, n);
708                 case Qbootp:
709                         return bootpread(a, offset, n);
710                 case Qndb:
711                         return readstr(offset, a, n, f->ndb);
712                 case Qiproute:
713                         return routeread(f, a, offset, n);
714                 case Qiprouter:
715                         return iprouterread(f, a, n);
716                 case Qipselftab:
717                         return ipselftabread(f, a, offset, n);
718                 case Qlog:
719                         return netlogread(f, a, offset, n);
720                 case Qctl:
721                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
722                         return readstr(offset, p, n, get_cur_genbuf());
723                 case Qremote:
724                         buf = kzmalloc(Statelen, 0);
725                         x = f->p[PROTO(ch->qid)];
726                         c = x->conv[CONV(ch->qid)];
727                         if (x->remote == NULL) {
728                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
729                         } else {
730                                 (*x->remote) (c, buf, Statelen - 2);
731                         }
732                         rv = readstr(offset, p, n, buf);
733                         kfree(buf);
734                         return rv;
735                 case Qlocal:
736                         buf = kzmalloc(Statelen, 0);
737                         x = f->p[PROTO(ch->qid)];
738                         c = x->conv[CONV(ch->qid)];
739                         if (x->local == NULL) {
740                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
741                         } else {
742                                 (*x->local) (c, buf, Statelen - 2);
743                         }
744                         rv = readstr(offset, p, n, buf);
745                         kfree(buf);
746                         return rv;
747                 case Qstatus:
748                         /* this all is a bit screwed up since the size of some state's
749                          * buffers will change from one invocation to another.  a reader
750                          * will come in and read the entire buffer.  then it will come again
751                          * and read from the next offset, expecting EOF.  if the buffer
752                          * changed sizes, it'll reprint the end of the buffer slightly. */
753                         buf = kzmalloc(Statelen, 0);
754                         x = f->p[PROTO(ch->qid)];
755                         c = x->conv[CONV(ch->qid)];
756                         sofar = (*x->state) (c, buf, Statelen - 2);
757                         sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
758                                           c->nonblock ? "on" : "off");
759                         rv = readstr(offset, p, n, buf);
760                         kfree(buf);
761                         return rv;
762                 case Qdata:
763                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
764                         return qread(c->rq, a, n);
765                 case Qerr:
766                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
767                         return qread(c->eq, a, n);
768                 case Qsnoop:
769                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
770                         return qread(c->sq, a, n);
771                 case Qstats:
772                         x = f->p[PROTO(ch->qid)];
773                         if (x->stats == NULL)
774                                 error("stats not implemented");
775                         buf = kzmalloc(Statelen, 0);
776                         (*x->stats) (x, buf, Statelen);
777                         rv = readstr(offset, p, n, buf);
778                         kfree(buf);
779                         return rv;
780         }
781 }
782
783 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
784 {
785         struct conv *c;
786         struct Proto *x;
787         struct Fs *f;
788
789         switch (TYPE(ch->qid)) {
790                 case Qdata:
791                         f = ipfs[ch->dev];
792                         x = f->p[PROTO(ch->qid)];
793                         c = x->conv[CONV(ch->qid)];
794                         return qbread(c->rq, n);
795                 default:
796                         return devbread(ch, n, offset);
797         }
798 }
799
800 /*
801  *  set local address to be that of the ifc closest to remote address
802  */
803 static void setladdr(struct conv *c)
804 {
805         findlocalip(c->p->f, c->laddr, c->raddr);
806 }
807
808 /*
809  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
810  */
811 static char *setluniqueport(struct conv *c, int lport)
812 {
813         struct Proto *p;
814         struct conv *xp;
815         int x;
816
817         p = c->p;
818
819         qlock(&p->qlock);
820         for (x = 0; x < p->nc; x++) {
821                 xp = p->conv[x];
822                 if (xp == NULL)
823                         break;
824                 if (xp == c)
825                         continue;
826                 if ((xp->state == Connected || xp->state == Announced)
827                         && xp->lport == lport
828                         && xp->rport == c->rport
829                         && ipcmp(xp->raddr, c->raddr) == 0
830                         && ipcmp(xp->laddr, c->laddr) == 0) {
831                         qunlock(&p->qlock);
832                         return "address in use";
833                 }
834         }
835         c->lport = lport;
836         qunlock(&p->qlock);
837         return NULL;
838 }
839
840 /*
841  *  pick a local port and set it
842  */
843 static void setlport(struct conv *c)
844 {
845         struct Proto *p;
846         uint16_t *pp;
847         int x, found;
848
849         p = c->p;
850         if (c->restricted)
851                 pp = &p->nextrport;
852         else
853                 pp = &p->nextport;
854         qlock(&p->qlock);
855         for (;; (*pp)++) {
856                 /*
857                  * Fsproto initialises p->nextport to 0 and the restricted
858                  * ports (p->nextrport) to 600.
859                  * Restricted ports must lie between 600 and 1024.
860                  * For the initial condition or if the unrestricted port number
861                  * has wrapped round, select a random port between 5000 and 1<<15
862                  * to start at.
863                  */
864                 if (c->restricted) {
865                         if (*pp >= 1024)
866                                 *pp = 600;
867                 } else
868                         while (*pp < 5000)
869                                 *pp = nrand(1 << 15);
870
871                 found = 0;
872                 for (x = 0; x < p->nc; x++) {
873                         if (p->conv[x] == NULL)
874                                 break;
875                         if (p->conv[x]->lport == *pp) {
876                                 found = 1;
877                                 break;
878                         }
879                 }
880                 if (!found)
881                         break;
882         }
883         c->lport = (*pp)++;
884         qunlock(&p->qlock);
885 }
886
887 /*
888  *  set a local address and port from a string of the form
889  *      [address!]port[!r]
890  */
891 static char *setladdrport(struct conv *c, char *str, int announcing)
892 {
893         char *p;
894         char *rv;
895         uint16_t lport;
896         uint8_t addr[IPaddrlen];
897
898         rv = NULL;
899
900         /*
901          *  ignore restricted part if it exists.  it's
902          *  meaningless on local ports.
903          */
904         p = strchr(str, '!');
905         if (p != NULL) {
906                 *p++ = 0;
907                 if (strcmp(p, "r") == 0)
908                         p = NULL;
909         }
910
911         c->lport = 0;
912         if (p == NULL) {
913                 if (announcing)
914                         ipmove(c->laddr, IPnoaddr);
915                 else
916                         setladdr(c);
917                 p = str;
918         } else {
919                 if (strcmp(str, "*") == 0)
920                         ipmove(c->laddr, IPnoaddr);
921                 else {
922                         parseip(addr, str);
923                         if (ipforme(c->p->f, addr))
924                                 ipmove(c->laddr, addr);
925                         else
926                                 return "not a local IP address";
927                 }
928         }
929
930         /* one process can get all connections */
931         if (announcing && strcmp(p, "*") == 0) {
932                 if (!iseve())
933                         error(Eperm);
934                 return setluniqueport(c, 0);
935         }
936
937         lport = atoi(p);
938         if (lport <= 0)
939                 setlport(c);
940         else
941                 rv = setluniqueport(c, lport);
942         return rv;
943 }
944
945 static char *setraddrport(struct conv *c, char *str)
946 {
947         char *p;
948
949         p = strchr(str, '!');
950         if (p == NULL)
951                 return "malformed address";
952         *p++ = 0;
953         parseip(c->raddr, str);
954         c->rport = atoi(p);
955         p = strchr(p, '!');
956         if (p) {
957                 if (strstr(p, "!r") != NULL)
958                         c->restricted = 1;
959         }
960         return NULL;
961 }
962
963 /*
964  *  called by protocol connect routine to set addresses
965  */
966 char *Fsstdconnect(struct conv *c, char *argv[], int argc)
967 {
968         char *p;
969
970         switch (argc) {
971                 default:
972                         return "bad args to connect";
973                 case 2:
974                         p = setraddrport(c, argv[1]);
975                         if (p != NULL)
976                                 return p;
977                         setladdr(c);
978                         setlport(c);
979                         break;
980                 case 3:
981                         p = setraddrport(c, argv[1]);
982                         if (p != NULL)
983                                 return p;
984                         p = setladdrport(c, argv[2], 0);
985                         if (p != NULL)
986                                 return p;
987         }
988
989         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
990                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
991                 || ipcmp(c->raddr, IPnoaddr) == 0)
992                 c->ipversion = V4;
993         else
994                 c->ipversion = V6;
995
996         return NULL;
997 }
998
999 /*
1000  *  initiate connection and sleep till its set up
1001  */
1002 static int connected(void *a)
1003 {
1004         return ((struct conv *)a)->state == Connected;
1005 }
1006
1007 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1008 {
1009         ERRSTACK(1);
1010         char *p;
1011
1012         if (c->state != 0)
1013                 error(Econinuse);
1014         c->state = Connecting;
1015         c->cerr[0] = '\0';
1016         if (x->connect == NULL)
1017                 error("connect not supported");
1018         p = x->connect(c, cb->f, cb->nf);
1019         if (p != NULL)
1020                 error(p);
1021
1022         qunlock(&c->qlock);
1023         if (waserror()) {
1024                 qlock(&c->qlock);
1025                 nexterror();
1026         }
1027         rendez_sleep(&c->cr, connected, c);
1028         qlock(&c->qlock);
1029         poperror();
1030
1031         if (c->cerr[0] != '\0')
1032                 error(c->cerr);
1033 }
1034
1035 /*
1036  *  called by protocol announce routine to set addresses
1037  */
1038 char *Fsstdannounce(struct conv *c, char *argv[], int argc)
1039 {
1040         memset(c->raddr, 0, sizeof(c->raddr));
1041         c->rport = 0;
1042         switch (argc) {
1043                 default:
1044                         return "bad args to announce";
1045                 case 2:
1046                         return setladdrport(c, argv[1], 1);
1047         }
1048 }
1049
1050 /*
1051  *  initiate announcement and sleep till its set up
1052  */
1053 static int announced(void *a)
1054 {
1055         return ((struct conv *)a)->state == Announced;
1056 }
1057
1058 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1059 {
1060         ERRSTACK(1);
1061         char *p;
1062
1063         if (c->state != 0)
1064                 error(Econinuse);
1065         c->state = Announcing;
1066         c->cerr[0] = '\0';
1067         if (x->announce == NULL)
1068                 error("announce not supported");
1069         p = x->announce(c, cb->f, cb->nf);
1070         if (p != NULL)
1071                 error(p);
1072
1073         qunlock(&c->qlock);
1074         if (waserror()) {
1075                 qlock(&c->qlock);
1076                 nexterror();
1077         }
1078         rendez_sleep(&c->cr, announced, c);
1079         qlock(&c->qlock);
1080         poperror();
1081
1082         if (c->cerr[0] != '\0')
1083                 error(c->cerr);
1084 }
1085
1086 /*
1087  *  called by protocol bind routine to set addresses
1088  */
1089 char *Fsstdbind(struct conv *c, char *argv[], int argc)
1090 {
1091         switch (argc) {
1092                 default:
1093                         return "bad args to bind";
1094                 case 2:
1095                         return setladdrport(c, argv[1], 0);
1096         }
1097 }
1098
1099 void Fsconvnonblock(struct conv *cv, bool onoff)
1100 {
1101         qnonblock(cv->wq, onoff);
1102         qnonblock(cv->rq, onoff);
1103         cv->nonblock = onoff;
1104 }
1105
1106 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1107 {
1108         char *p;
1109
1110         if (x->bind == NULL)
1111                 p = Fsstdbind(c, cb->f, cb->nf);
1112         else
1113                 p = x->bind(c, cb->f, cb->nf);
1114         if (p != NULL)
1115                 error(p);
1116 }
1117
1118 static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
1119 {
1120         if (cb->nf < 2)
1121                 goto err;
1122         if (!strcmp(cb->f[1], "on"))
1123                 Fsconvnonblock(c, TRUE);
1124         else if (!strcmp(cb->f[1], "off"))
1125                 Fsconvnonblock(c, FALSE);
1126         else
1127                 goto err;
1128         return;
1129 err:
1130         set_errno(EINVAL);
1131         error("nonblock [on|off]");
1132 }
1133
1134 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1135 {
1136         if (cb->nf < 2)
1137                 c->tos = 0;
1138         else
1139                 c->tos = atoi(cb->f[1]);
1140 }
1141
1142 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1143 {
1144         if (cb->nf < 2)
1145                 c->ttl = MAXTTL;
1146         else
1147                 c->ttl = atoi(cb->f[1]);
1148 }
1149
1150 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1151 {
1152         ERRSTACK(1);
1153         struct conv *c;
1154         struct Proto *x;
1155         char *p;
1156         struct cmdbuf *cb;
1157         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1158         struct Fs *f;
1159         char *a;
1160
1161         a = v;
1162         f = ipfs[ch->dev];
1163
1164         switch (TYPE(ch->qid)) {
1165                 default:
1166                         error(Eperm);
1167                 case Qdata:
1168                         x = f->p[PROTO(ch->qid)];
1169                         c = x->conv[CONV(ch->qid)];
1170
1171                         if (c->wq == NULL)
1172                                 error(Eperm);
1173
1174                         qwrite(c->wq, a, n);
1175                         break;
1176                 case Qarp:
1177                         return arpwrite(f, a, n);
1178                 case Qiproute:
1179                         return routewrite(f, ch, a, n);
1180                 case Qlog:
1181                         netlogctl(f, a, n);
1182                         return n;
1183                 case Qndb:
1184                         return ndbwrite(f, a, off, n);
1185                 case Qctl:
1186                         x = f->p[PROTO(ch->qid)];
1187                         c = x->conv[CONV(ch->qid)];
1188                         cb = parsecmd(a, n);
1189
1190                         qlock(&c->qlock);
1191                         if (waserror()) {
1192                                 qunlock(&c->qlock);
1193                                 kfree(cb);
1194                                 nexterror();
1195                         }
1196                         if (cb->nf < 1)
1197                                 error("short control request");
1198                         if (strcmp(cb->f[0], "connect") == 0)
1199                                 connectctlmsg(x, c, cb);
1200                         else if (strcmp(cb->f[0], "announce") == 0)
1201                                 announcectlmsg(x, c, cb);
1202                         else if (strcmp(cb->f[0], "bind") == 0)
1203                                 bindctlmsg(x, c, cb);
1204                         else if (strcmp(cb->f[0], "nonblock") == 0)
1205                                 nonblockctlmsg(c, cb);
1206                         else if (strcmp(cb->f[0], "ttl") == 0)
1207                                 ttlctlmsg(c, cb);
1208                         else if (strcmp(cb->f[0], "tos") == 0)
1209                                 tosctlmsg(c, cb);
1210                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1211                                 c->ignoreadvice = 1;
1212                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1213                                 if (cb->nf < 2)
1214                                         error("addmulti needs interface address");
1215                                 if (cb->nf == 2) {
1216                                         if (!ipismulticast(c->raddr))
1217                                                 error("addmulti for a non multicast address");
1218                                         parseip(ia, cb->f[1]);
1219                                         ipifcaddmulti(c, c->raddr, ia);
1220                                 } else {
1221                                         parseip(ma, cb->f[2]);
1222                                         if (!ipismulticast(ma))
1223                                                 error("addmulti for a non multicast address");
1224                                         parseip(ia, cb->f[1]);
1225                                         ipifcaddmulti(c, ma, ia);
1226                                 }
1227                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1228                                 if (cb->nf < 2)
1229                                         error("remmulti needs interface address");
1230                                 if (!ipismulticast(c->raddr))
1231                                         error("remmulti for a non multicast address");
1232                                 parseip(ia, cb->f[1]);
1233                                 ipifcremmulti(c, c->raddr, ia);
1234                         } else if (x->ctl != NULL) {
1235                                 p = x->ctl(c, cb->f, cb->nf);
1236                                 if (p != NULL)
1237                                         error(p);
1238                         } else
1239                                 error("unknown control request");
1240                         qunlock(&c->qlock);
1241                         kfree(cb);
1242                         poperror();
1243         }
1244         return n;
1245 }
1246
1247 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1248 {
1249         struct conv *c;
1250         struct Proto *x;
1251         struct Fs *f;
1252         int n;
1253
1254         switch (TYPE(ch->qid)) {
1255                 case Qdata:
1256                         f = ipfs[ch->dev];
1257                         x = f->p[PROTO(ch->qid)];
1258                         c = x->conv[CONV(ch->qid)];
1259
1260                         if (c->wq == NULL)
1261                                 error(Eperm);
1262
1263                         if (bp->next)
1264                                 bp = concatblock(bp);
1265                         n = BLEN(bp);
1266                         qbwrite(c->wq, bp);
1267                         return n;
1268                 default:
1269                         return devbwrite(ch, bp, offset);
1270         }
1271 }
1272
1273 struct dev ipdevtab __devtab = {
1274         'I',
1275         "ip",
1276
1277         ipreset,
1278         ipinit,
1279         devshutdown,
1280         ipattach,
1281         ipwalk,
1282         ipstat,
1283         ipopen,
1284         devcreate,
1285         ipclose,
1286         ipread,
1287         ipbread,
1288         ipwrite,
1289         ipbwrite,
1290         devremove,
1291         ipwstat,
1292         devpower,
1293         ipchaninfo,
1294 };
1295
1296 int Fsproto(struct Fs *f, struct Proto *p)
1297 {
1298         if (f->np >= Maxproto)
1299                 return -1;
1300
1301         qlock_init(&p->qlock);
1302         p->f = f;
1303
1304         if (p->ipproto > 0) {
1305                 if (f->t2p[p->ipproto] != NULL)
1306                         return -1;
1307                 f->t2p[p->ipproto] = p;
1308         }
1309
1310         p->qid.type = QTDIR;
1311         p->qid.path = QID(f->np, 0, Qprotodir);
1312         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1313         if (p->conv == NULL)
1314                 panic("Fsproto");
1315
1316         p->x = f->np;
1317         p->nextport = 0;
1318         p->nextrport = 600;
1319         f->p[f->np++] = p;
1320
1321         return 0;
1322 }
1323
1324 /*
1325  *  return true if this protocol is
1326  *  built in
1327  */
1328 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1329 {
1330         return f->t2p[proto] != NULL;
1331 }
1332
1333 /*
1334  *  called with protocol locked
1335  */
1336 struct conv *Fsprotoclone(struct Proto *p, char *user)
1337 {
1338         struct conv *c, **pp, **ep;
1339
1340 retry:
1341         c = NULL;
1342         ep = &p->conv[p->nc];
1343         for (pp = p->conv; pp < ep; pp++) {
1344                 c = *pp;
1345                 if (c == NULL) {
1346                         c = kzmalloc(sizeof(struct conv), 0);
1347                         if (c == NULL)
1348                                 error(Enomem);
1349                         qlock_init(&c->qlock);
1350                         qlock_init(&c->listenq);
1351                         rendez_init(&c->cr);
1352                         rendez_init(&c->listenr);
1353                         qlock(&c->qlock);
1354                         c->p = p;
1355                         c->x = pp - p->conv;
1356                         if (p->ptclsize != 0) {
1357                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1358                                 if (c->ptcl == NULL) {
1359                                         kfree(c);
1360                                         error(Enomem);
1361                                 }
1362                         }
1363                         *pp = c;
1364                         p->ac++;
1365                         c->eq = qopen(1024, Qmsg, 0, 0);
1366                         (*p->create) (c);
1367                         break;
1368                 }
1369                 if (canqlock(&c->qlock)) {
1370                         /*
1371                          *  make sure both processes and protocol
1372                          *  are done with this Conv
1373                          */
1374                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1375                                 break;
1376
1377                         qunlock(&c->qlock);
1378                 }
1379         }
1380         if (pp >= ep) {
1381                 if (p->gc != NULL && (*p->gc) (p))
1382                         goto retry;
1383                 return NULL;
1384         }
1385
1386         c->inuse = 1;
1387         kstrdup(&c->owner, user);
1388         c->perm = 0660;
1389         c->state = Idle;
1390         ipmove(c->laddr, IPnoaddr);
1391         ipmove(c->raddr, IPnoaddr);
1392         c->r = NULL;
1393         c->rgen = 0;
1394         c->lport = 0;
1395         c->rport = 0;
1396         c->restricted = 0;
1397         c->ttl = MAXTTL;
1398         c->tos = DFLTTOS;
1399         c->nonblock = FALSE;
1400         qreopen(c->rq);
1401         qreopen(c->wq);
1402         qreopen(c->eq);
1403
1404         qunlock(&c->qlock);
1405         return c;
1406 }
1407
1408 int Fsconnected(struct conv *c, char *msg)
1409 {
1410         if (msg != NULL && *msg != '\0')
1411                 strncpy(c->cerr, msg, sizeof(c->cerr));
1412
1413         switch (c->state) {
1414
1415                 case Announcing:
1416                         c->state = Announced;
1417                         break;
1418
1419                 case Connecting:
1420                         c->state = Connected;
1421                         break;
1422         }
1423
1424         rendez_wakeup(&c->cr);
1425         return 0;
1426 }
1427
1428 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1429 {
1430         if (f->ipmux)
1431                 return f->ipmux;
1432         else
1433                 return f->t2p[proto];
1434 }
1435
1436 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1437 {
1438         return f->t2p[proto];
1439 }
1440
1441 /*
1442  *  called with protocol locked
1443  */
1444 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1445                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1446 {
1447         struct conv *nc;
1448         struct conv **l;
1449         int i;
1450
1451         qlock(&c->qlock);
1452         i = 0;
1453         for (l = &c->incall; *l; l = &(*l)->next)
1454                 i++;
1455         if (i >= Maxincall) {
1456                 qunlock(&c->qlock);
1457                 return NULL;
1458         }
1459
1460         /* find a free conversation */
1461         nc = Fsprotoclone(c->p, network);
1462         if (nc == NULL) {
1463                 qunlock(&c->qlock);
1464                 return NULL;
1465         }
1466         ipmove(nc->raddr, raddr);
1467         nc->rport = rport;
1468         ipmove(nc->laddr, laddr);
1469         nc->lport = lport;
1470         nc->next = NULL;
1471         *l = nc;
1472         nc->state = Connected;
1473         nc->ipversion = version;
1474
1475         qunlock(&c->qlock);
1476
1477         rendez_wakeup(&c->listenr);
1478
1479         return nc;
1480 }
1481
1482 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1483 {
1484         if (off > strlen(f->ndb))
1485                 error(Eio);
1486         if (off + n >= sizeof(f->ndb) - 1)
1487                 error(Eio);
1488         memmove(f->ndb + off, a, n);
1489         f->ndb[off + n] = 0;
1490         f->ndbvers++;
1491         f->ndbmtime = seconds();
1492         return n;
1493 }
1494
1495 uint32_t scalednconv(void)
1496 {
1497         //if(conf.npage*BY2PG >= 128*MB)
1498         return Nchans * 4;
1499         //  return Nchans;
1500 }