Replaces snoopers with an atomic
[akaros.git] / kern / src / net / devip.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 enum
17 {
18         Qtopdir=        1,              /* top level directory */
19         Qtopbase,
20         Qarp=           Qtopbase,
21         Qbootp,
22         Qndb,
23         Qiproute,
24         Qiprouter,
25         Qipselftab,
26         Qlog,
27
28         Qprotodir,                      /* directory for a protocol */
29         Qprotobase,
30         Qclone=         Qprotobase,
31         Qstats,
32
33         Qconvdir,                       /* directory for a conversation */
34         Qconvbase,
35         Qctl=           Qconvbase,
36         Qdata,
37         Qerr,
38         Qlisten,
39         Qlocal,
40         Qremote,
41         Qstatus,
42         Qsnoop,
43
44         Logtype=        5,
45         Masktype=       (1<<Logtype)-1,
46         Logconv=        12,
47         Maskconv=       (1<<Logconv)-1,
48         Shiftconv=      Logtype,
49         Logproto=       8,
50         Maskproto=      (1<<Logproto)-1,
51         Shiftproto=     Logtype + Logconv,
52
53         Nfs=            32,
54 };
55 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
56 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
57 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
58 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
59 static char network[] = "network";
60
61 qlock_t fslock;
62 struct Fs       *ipfs[Nfs];     /* attached fs's */
63 struct queue    *qlog;
64
65 extern  void nullmediumlink(void);
66 extern  void pktmediumlink(void);
67 static  long ndbwrite(struct Fs*, char *unused_char_p_t, uint32_t, int);
68 static  void    closeconv(struct conv*);
69
70 static int
71 ip3gen(struct chan *c, int i, struct dir *dp)
72 {
73         struct qid q;
74         struct conv *cv;
75         char *p;
76
77         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
78         if(cv->owner == NULL)
79                 kstrdup(&cv->owner, eve);
80         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
81
82         switch(i) {
83         default:
84                 return -1;
85         case Qctl:
86                 devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
87                 return 1;
88         case Qdata:
89                 devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
90                 return 1;
91         case Qerr:
92                 devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
93                 return 1;
94         case Qlisten:
95                 devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
96                 return 1;
97         case Qlocal:
98                 p = "local";
99                 break;
100         case Qremote:
101                 p = "remote";
102                 break;
103         case Qsnoop:
104                 if(strcmp(cv->p->name, "ipifc") != 0)
105                         return -1;
106                 devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
107                 return 1;
108         case Qstatus:
109                 p = "status";
110                 break;
111         }
112         devdir(c, q, p, 0, cv->owner, 0444, dp);
113         return 1;
114 }
115
116 static int
117 ip2gen(struct chan *c, int i, struct dir *dp)
118 {
119         struct qid q;
120
121         switch(i) {
122         case Qclone:
123                 mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
124                 devdir(c, q, "clone", 0, network, 0666, dp);
125                 return 1;
126         case Qstats:
127                 mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
128                 devdir(c, q, "stats", 0, network, 0444, dp);
129                 return 1;
130         }       
131         return -1;
132 }
133
134 static int
135 ip1gen(struct chan *c, int i, struct dir *dp)
136 {
137         struct qid q;
138         char *p;
139         int prot;
140         int len = 0;
141         struct Fs *f;
142         extern uint32_t kerndate;
143
144         f = ipfs[c->dev];
145
146         prot = 0666;
147         mkqid(&q, QID(0, 0, i), 0, QTFILE);
148         switch(i) {
149         default:
150                 return -1;
151         case Qarp:
152                 p = "arp";
153                 break;
154         case Qbootp:
155                 p = "bootp";
156                 if(bootp == NULL)
157                         return 0;
158                 break;
159         case Qndb:
160                 p = "ndb";
161                 len = strlen(f->ndb);
162                 q.vers = f->ndbvers;
163                 break;
164         case Qiproute:
165                 p = "iproute";
166                 break;
167         case Qipselftab:
168                 p = "ipselftab";
169                 prot = 0444;
170                 break;
171         case Qiprouter:
172                 p = "iprouter";
173                 break;
174         case Qlog:
175                 p = "log";
176                 break;
177         }
178         devdir(c, q, p, len, network, prot, dp);
179         if(i == Qndb && f->ndbmtime > kerndate)
180                 dp->mtime = f->ndbmtime;
181         return 1;
182 }
183
184 static int
185 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab*d, int unused_int, int s, struct dir *dp)
186 {
187         struct qid q;
188         struct conv *cv;
189         struct Fs *f;
190
191         f = ipfs[c->dev];
192
193         switch(TYPE(c->qid)) {
194         case Qtopdir:
195                 if(s == DEVDOTDOT){
196                         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
197                         snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lud", c->dev);
198                         devdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
199                         return 1;
200                 }
201                 if(s < f->np) {
202                         if(f->p[s]->connect == NULL)
203                                 return 0;       /* protocol with no user interface */
204                         mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
205                         devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
206                         return 1;
207                 }
208                 s -= f->np;
209                 return ip1gen(c, s+Qtopbase, dp);
210         case Qarp:
211         case Qbootp:
212         case Qndb:
213         case Qlog:
214         case Qiproute:
215         case Qiprouter:
216         case Qipselftab:
217                 return ip1gen(c, TYPE(c->qid), dp);
218         case Qprotodir:
219                 if(s == DEVDOTDOT){
220                         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
221                         snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lud", c->dev);
222                         devdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
223                         return 1;
224                 }
225                 if(s < f->p[PROTO(c->qid)]->ac) {
226                         cv = f->p[PROTO(c->qid)]->conv[s];
227                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
228                         mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
229                         devdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
230                         return 1;
231                 }
232                 s -= f->p[PROTO(c->qid)]->ac;
233                 return ip2gen(c, s+Qprotobase, dp);
234         case Qclone:
235         case Qstats:
236                 return ip2gen(c, TYPE(c->qid), dp);
237         case Qconvdir:
238                 if(s == DEVDOTDOT){
239                         s = PROTO(c->qid);
240                         mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
241                         devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
242                         return 1;
243                 }
244                 return ip3gen(c, s+Qconvbase, dp);
245         case Qctl:
246         case Qdata:
247         case Qerr:
248         case Qlisten:
249         case Qlocal:
250         case Qremote:
251         case Qstatus:
252         case Qsnoop:
253                 return ip3gen(c, TYPE(c->qid), dp);
254         }
255         return -1;
256 }
257
258 static void
259 ipreset(void)
260 {
261         nullmediumlink();
262         pktmediumlink();
263 /* if only
264         fmtinstall('i', eipfmt);
265         fmtinstall('I', eipfmt);
266         fmtinstall('E', eipfmt);
267         fmtinstall('V', eipfmt);
268         fmtinstall('M', eipfmt);
269 */
270 }
271
272 static struct Fs*
273 ipgetfs(int dev)
274 {
275         extern void (*ipprotoinit[])(struct Fs*);
276         struct Fs *f;
277         int i;
278
279         if(dev >= Nfs)
280                 return NULL;
281
282         qlock(&fslock);
283         if(ipfs[dev] == NULL){
284                 f = kzmalloc(sizeof(struct Fs), 0);
285                 ip_init(f);
286                 arpinit(f);
287                 netloginit(f);
288                 for(i = 0; ipprotoinit[i]; i++)
289                         ipprotoinit[i](f);
290                 f->dev = dev;
291                 ipfs[dev] = f;
292         }
293         qunlock(&fslock);
294
295         return ipfs[dev];
296 }
297
298 struct IPaux*
299 newipaux(char *owner, char *tag)
300 {
301         struct IPaux *a;
302         int n;
303
304         a = kzmalloc(sizeof(*a), 0);
305         kstrdup(&a->owner, owner);
306         memset(a->tag, ' ', sizeof(a->tag));
307         n = strlen(tag);
308         if(n > sizeof(a->tag))
309                 n = sizeof(a->tag);
310         memmove(a->tag, tag, n);
311         return a;
312 }
313
314 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
315
316 static struct chan*
317 ipattach(char* spec)
318 {
319         struct chan *c;
320         int dev;
321
322         dev = atoi(spec);
323         if(dev >= Nfs)
324                 error("bad specification");
325
326         ipgetfs(dev);
327         c = devattach('I', spec);
328         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
329         c->dev = dev;
330
331         c->aux = newipaux(commonuser(), "none");
332
333         return c;
334 }
335
336 static struct walkqid*
337 ipwalk(struct chan* c, struct chan *nc, char **name, int nname)
338 {
339         struct IPaux *a = c->aux;
340         struct walkqid* w;
341
342         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
343         if(w != NULL && w->clone != NULL)
344                 w->clone->aux = newipaux(a->owner, a->tag);
345         return w;
346 }
347
348 static int
349 ipstat(struct chan* c, uint8_t* db, int n)
350 {
351         return devstat(c, db, n, NULL, 0, ipgen);
352 }
353
354 static int
355 incoming(void* arg)
356 {
357         struct conv *conv;
358
359         conv = arg;
360         return conv->incall != NULL;
361 }
362
363 static int m2p[] = {
364         [OREAD]         4,
365         [OWRITE]        2,
366         [ORDWR]         6
367 };
368
369 static struct chan*
370 ipopen(struct chan* c, int omode)
371 {
372         ERRSTACK(2);
373         struct conv *cv, *nc;
374         struct Proto *p;
375         int perm;
376         struct Fs *f;
377
378         perm = m2p[omode&3];
379
380         f = ipfs[c->dev];
381
382         switch(TYPE(c->qid)) {
383         default:
384                 break;
385         case Qndb:
386                 if(omode & (OWRITE|OTRUNC) && !iseve())
387                         error(Eperm);
388                 if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
389                         f->ndb[0] = 0;
390                 break;
391         case Qlog:
392                 netlogopen(f);
393                 break;
394         case Qiprouter:
395                 iprouteropen(f);
396                 break;
397         case Qiproute:
398                 break;
399         case Qtopdir:
400         case Qprotodir:
401         case Qconvdir:
402         case Qstatus:
403         case Qremote:
404         case Qlocal:
405         case Qstats:
406         case Qbootp:
407         case Qipselftab:
408                 if(omode != OREAD)
409                         error(Eperm);
410                 break;
411         case Qsnoop:
412                 if(omode != OREAD)
413                         error(Eperm);
414                 p = f->p[PROTO(c->qid)];
415                 cv = p->conv[CONV(c->qid)];
416                 if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
417                         error(Eperm);
418                 atomic_inc(&cv->snoopers);
419                 break;
420         case Qclone:
421                 p = f->p[PROTO(c->qid)];
422                 qlock(&p->qlock);
423                 if(waserror()){
424                         qunlock(&p->qlock);
425                         nexterror();
426                 }
427                 cv = Fsprotoclone(p, ATTACHER(c));
428                 qunlock(&p->qlock);
429                 poperror();
430                 if(cv == NULL) {
431                         error(Enodev);
432                         break;
433                 }
434                 mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
435                 break;
436         case Qdata:
437         case Qctl:
438         case Qerr:
439                 p = f->p[PROTO(c->qid)];
440                 qlock(&p->qlock);
441                 cv = p->conv[CONV(c->qid)];
442                 qlock(&cv->qlock);
443                 if(waserror()) {
444                         qunlock(&cv->qlock);
445                         qunlock(&p->qlock);
446                         nexterror();
447                 }
448                 if((perm & (cv->perm>>6)) != perm) {
449                         if(strcmp(ATTACHER(c), cv->owner) != 0)
450                                 error(Eperm);
451                         if((perm & cv->perm) != perm)
452                                 error(Eperm); 
453
454                 }
455                 cv->inuse++;
456                 if(cv->inuse == 1){
457                         kstrdup(&cv->owner, ATTACHER(c));
458                         cv->perm = 0660;
459                 }
460                 qunlock(&cv->qlock);
461                 qunlock(&p->qlock);
462                 poperror();
463                 break;
464         case Qlisten:
465                 cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
466                 if((perm & (cv->perm>>6)) != perm) {
467                         if(strcmp(ATTACHER(c), cv->owner) != 0)
468                                 error(Eperm);
469                         if((perm & cv->perm) != perm)
470                                 error(Eperm); 
471
472                 }
473
474                 if(cv->state != Announced)
475                         error("not announced");
476
477                 if(waserror()){
478                         closeconv(cv);
479                         nexterror();
480                 }
481                 qlock(&cv->qlock);
482                 cv->inuse++;
483                 qunlock(&cv->qlock);
484
485                 nc = NULL;
486                 while(nc == NULL) {
487                         /* give up if we got a hangup */
488                         if(qisclosed(cv->rq))
489                                 error("listen hungup");
490
491                         qlock(&cv->listenq);
492                         if(waserror()) {
493                                 qunlock(&cv->listenq);
494                                 nexterror();
495                         }
496
497                         /* wait for a connect */
498                         rendez_sleep(&cv->listenr, incoming, cv);
499
500                         qlock(&cv->qlock);
501                         nc = cv->incall;
502                         if(nc != NULL){
503                                 cv->incall = nc->next;
504                                 mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
505                                 kstrdup(&cv->owner, ATTACHER(c));
506                         }
507                         qunlock(&cv->qlock);
508
509                         qunlock(&cv->listenq);
510                         poperror();
511                 }
512                 closeconv(cv);
513                 poperror();
514                 break;
515         }
516         c->mode = openmode(omode);
517         c->flag |= COPEN;
518         c->offset = 0;
519         return c;
520 }
521
522 static int
523 ipwstat(struct chan *c, uint8_t *dp, int n)
524 {
525         ERRSTACK(2);
526         struct dir *d;
527         struct conv *cv;
528         struct Fs *f;
529         struct Proto *p;
530
531         f = ipfs[c->dev];
532         switch(TYPE(c->qid)) {
533         default:
534                 error(Eperm);
535                 break;
536         case Qctl:
537         case Qdata:
538                 break;
539         }
540
541         d = kzmalloc(sizeof(*d) + n, 0);
542         if(waserror()){
543                 kfree(d);
544                 nexterror();
545         }
546         n = convM2D(dp, n, d, ( char *)&d[1]);
547         if(n == 0)
548                 error(Eshortstat);
549         p = f->p[PROTO(c->qid)];
550         cv = p->conv[CONV(c->qid)];
551         if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
552                 error(Eperm);
553         if(!emptystr(d->uid))
554                 kstrdup(&cv->owner, d->uid);
555         if(d->mode != ~0UL)
556                 cv->perm = d->mode & 0777;
557         poperror();
558         kfree(d);
559         return n;
560 }
561
562 static void
563 closeconv(struct conv *cv)
564 {
565         struct conv *nc;
566         struct Ipmulti *mp;
567
568         qlock(&cv->qlock);
569
570         if(--cv->inuse > 0) {
571                 qunlock(&cv->qlock);
572                 return;
573         }
574
575         /* close all incoming calls since no listen will ever happen */
576         for(nc = cv->incall; nc; nc = cv->incall){
577                 cv->incall = nc->next;
578                 closeconv(nc);
579         }
580         cv->incall = NULL;
581
582         kstrdup(&cv->owner, network);
583         cv->perm = 0660;
584
585         while((mp = cv->multi) != NULL)
586                 ipifcremmulti(cv, mp->ma, mp->ia);
587
588         cv->r = NULL;
589         cv->rgen = 0;
590         cv->p->close(cv);
591         cv->state = Idle;
592         qunlock(&cv->qlock);
593 }
594
595 static void
596 ipclose(struct chan* c)
597 {
598         struct Fs *f;
599
600         f = ipfs[c->dev];
601         switch(TYPE(c->qid)) {
602         default:
603                 break;
604         case Qlog:
605                 if(c->flag & COPEN)
606                         netlogclose(f);
607                 break;
608         case Qiprouter:
609                 if(c->flag & COPEN)
610                         iprouterclose(f);
611                 break;
612         case Qdata:
613         case Qctl:
614         case Qerr:
615                 if(c->flag & COPEN)
616                         closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
617                 break;
618         case Qsnoop:
619                 if(c->flag & COPEN)
620                         atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
621                 break;
622         }
623         kfree(((struct IPaux*)c->aux)->owner);
624         kfree(c->aux);
625 }
626
627 enum
628 {
629         Statelen=       32*1024,
630 };
631
632 static long
633 ipread(struct chan *ch, void *a, long n, int64_t off)
634 {
635         struct conv *c;
636         struct Proto *x;
637         char *buf, *p;
638         long rv;
639         struct Fs *f;
640         uint32_t offset = off;
641
642         f = ipfs[ch->dev];
643
644         p = a;
645         switch(TYPE(ch->qid)) {
646         default:
647                 error(Eperm);
648         case Qtopdir:
649         case Qprotodir:
650         case Qconvdir:
651                 return devdirread(ch, a, n, 0, 0, ipgen);
652         case Qarp:
653                 return arpread(f->arp, a, offset, n);
654         case Qbootp:
655                 return bootpread(a, offset, n);
656         case Qndb:
657                 return readstr(offset, a, n, f->ndb);
658         case Qiproute:
659                 return routeread(f, a, offset, n);
660         case Qiprouter:
661                 return iprouterread(f, a, n);
662         case Qipselftab:
663                 return ipselftabread(f, a, offset, n);
664         case Qlog:
665                 return netlogread(f, a, offset, n);
666         case Qctl:
667                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%lud", CONV(ch->qid));
668                 return readstr(offset, p, n, get_cur_genbuf());
669         case Qremote:
670                 buf = kzmalloc(Statelen, 0);
671                 x = f->p[PROTO(ch->qid)];
672                 c = x->conv[CONV(ch->qid)];
673                 if(x->remote == NULL) {
674                         snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
675                 } else {
676                         (*x->remote)(c, buf, Statelen-2);
677                 }
678                 rv = readstr(offset, p, n, buf);
679                 kfree(buf);
680                 return rv;
681         case Qlocal:
682                 buf = kzmalloc(Statelen, 0);
683                 x = f->p[PROTO(ch->qid)];
684                 c = x->conv[CONV(ch->qid)];
685                 if(x->local == NULL) {
686                         snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
687                 } else {
688                         (*x->local)(c, buf, Statelen-2);
689                 }
690                 rv = readstr(offset, p, n, buf);
691                 kfree(buf);
692                 return rv;
693         case Qstatus:
694                 buf = kzmalloc(Statelen, 0);
695                 x = f->p[PROTO(ch->qid)];
696                 c = x->conv[CONV(ch->qid)];
697                 (*x->state)(c, buf, Statelen-2);
698                 rv = readstr(offset, p, n, buf);
699                 kfree(buf);
700                 return rv;
701         case Qdata:
702                 c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
703                 return qread(c->rq, a, n);
704         case Qerr:
705                 c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
706                 return qread(c->eq, a, n);
707         case Qsnoop:
708                 c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
709                 return qread(c->sq, a, n);
710         case Qstats:
711                 x = f->p[PROTO(ch->qid)];
712                 if(x->stats == NULL)
713                         error("stats not implemented");
714                 buf = kzmalloc(Statelen, 0);
715                 (*x->stats)(x, buf, Statelen);
716                 rv = readstr(offset, p, n, buf);
717                 kfree(buf);
718                 return rv;
719         }
720 }
721
722 static struct block*
723 ipbread(struct chan* ch, long n, uint32_t offset)
724 {
725         struct conv *c;
726         struct Proto *x;
727         struct Fs *f;
728
729         switch(TYPE(ch->qid)){
730         case Qdata:
731                 f = ipfs[ch->dev];
732                 x = f->p[PROTO(ch->qid)];
733                 c = x->conv[CONV(ch->qid)];
734                 return qbread(c->rq, n);
735         default:
736                 return devbread(ch, n, offset);
737         }
738 }
739
740 /*
741  *  set local address to be that of the ifc closest to remote address
742  */
743 static void
744 setladdr(struct conv* c)
745 {
746         findlocalip(c->p->f, c->laddr, c->raddr);
747 }
748
749 /*
750  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
751  */
752 static char*
753 setluniqueport(struct conv* c, int lport)
754 {
755         struct Proto *p;
756         struct conv *xp;
757         int x;
758
759         p = c->p;
760
761         qlock(&p->qlock);
762         for(x = 0; x < p->nc; x++){
763                 xp = p->conv[x];
764                 if(xp == NULL)
765                         break;
766                 if(xp == c)
767                         continue;
768                 if((xp->state == Connected || xp->state == Announced)
769                 && xp->lport == lport
770                 && xp->rport == c->rport
771                 && ipcmp(xp->raddr, c->raddr) == 0
772                 && ipcmp(xp->laddr, c->laddr) == 0){
773                         qunlock(&p->qlock);
774                         return "address in use";
775                 }
776         }
777         c->lport = lport;
778         qunlock(&p->qlock);
779         return NULL;
780 }
781
782 /*
783  *  pick a local port and set it
784  */
785 static void
786 setlport(struct conv* c)
787 {
788         struct Proto *p;
789         uint16_t *pp;
790         int x, found;
791
792         p = c->p;
793         if(c->restricted)
794                 pp = &p->nextrport;
795         else
796                 pp = &p->nextport;
797         qlock(&p->qlock);
798         for(;;(*pp)++){
799                 /*
800                  * Fsproto initialises p->nextport to 0 and the restricted
801                  * ports (p->nextrport) to 600.
802                  * Restricted ports must lie between 600 and 1024.
803                  * For the initial condition or if the unrestricted port number
804                  * has wrapped round, select a random port between 5000 and 1<<15
805                  * to start at.
806                  */
807                 if(c->restricted){
808                         if(*pp >= 1024)
809                                 *pp = 600;
810                 }
811                 else while(*pp < 5000)
812                         *pp = nrand(1<<15);
813
814                 found = 0;
815                 for(x = 0; x < p->nc; x++){
816                         if(p->conv[x] == NULL)
817                                 break;
818                         if(p->conv[x]->lport == *pp){
819                                 found = 1;
820                                 break;
821                         }
822                 }
823                 if(!found)
824                         break;
825         }
826         c->lport = (*pp)++;
827         qunlock(&p->qlock);
828 }
829
830 /*
831  *  set a local address and port from a string of the form
832  *      [address!]port[!r]
833  */
834 static char*
835 setladdrport(struct conv* c, char* str, int announcing)
836 {
837         char *p;
838         char *rv;
839         uint16_t lport;
840         uint8_t addr[IPaddrlen];
841
842         rv = NULL;
843
844         /*
845          *  ignore restricted part if it exists.  it's
846          *  meaningless on local ports.
847          */
848         p = strchr(str, '!');
849         if(p != NULL){
850                 *p++ = 0;
851                 if(strcmp(p, "r") == 0)
852                         p = NULL;
853         }
854
855         c->lport = 0;
856         if(p == NULL){
857                 if(announcing)
858                         ipmove(c->laddr, IPnoaddr);
859                 else
860                         setladdr(c);
861                 p = str;
862         } else {
863                 if(strcmp(str, "*") == 0)
864                         ipmove(c->laddr, IPnoaddr);
865                 else {
866                         parseip(addr, str);
867                         if(ipforme(c->p->f, addr))
868                                 ipmove(c->laddr, addr);
869                         else
870                                 return "not a local IP address";
871                 }
872         }
873
874         /* one process can get all connections */
875         if(announcing && strcmp(p, "*") == 0){
876                 if(!iseve())
877                         error(Eperm);
878                 return setluniqueport(c, 0);
879         }
880
881         lport = atoi(p);
882         if(lport <= 0)
883                 setlport(c);
884         else
885                 rv = setluniqueport(c, lport);
886         return rv;
887 }
888
889 static char*
890 setraddrport(struct conv* c, char* str)
891 {
892         char *p;
893
894         p = strchr(str, '!');
895         if(p == NULL)
896                 return "malformed address";
897         *p++ = 0;
898         parseip(c->raddr, str);
899         c->rport = atoi(p);
900         p = strchr(p, '!');
901         if(p){
902                 if(strstr(p, "!r") != NULL)
903                         c->restricted = 1;
904         }
905         return NULL;
906 }
907
908 /*
909  *  called by protocol connect routine to set addresses
910  */
911 char*
912 Fsstdconnect(struct conv *c, char *argv[], int argc)
913 {
914         char *p;
915
916         switch(argc) {
917         default:
918                 return "bad args to connect";
919         case 2:
920                 p = setraddrport(c, argv[1]);
921                 if(p != NULL)
922                         return p;
923                 setladdr(c);
924                 setlport(c);
925                 break;
926         case 3:
927                 p = setraddrport(c, argv[1]);
928                 if(p != NULL)
929                         return p;
930                 p = setladdrport(c, argv[2], 0);
931                 if(p != NULL)
932                         return p;
933         }
934
935         if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
936                 memcmp(c->laddr, v4prefix, IPv4off) == 0)
937                 || ipcmp(c->raddr, IPnoaddr) == 0)
938                 c->ipversion = V4;
939         else
940                 c->ipversion = V6;
941
942         return NULL;
943 }
944 /*
945  *  initiate connection and sleep till its set up
946  */
947 static int
948 connected(void* a)
949 {
950         return ((struct conv*)a)->state == Connected;
951 }
952 static void
953 connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
954 {
955         ERRSTACK(2);
956         char *p;
957
958         if(c->state != 0)
959                 error(Econinuse);
960         c->state = Connecting;
961         c->cerr[0] = '\0';
962         if(x->connect == NULL)
963                 error("connect not supported");
964         p = x->connect(c, cb->f, cb->nf);
965         if(p != NULL)
966                 error(p);
967
968         qunlock(&c->qlock);
969         if(waserror()){
970                 qlock(&c->qlock);
971                 nexterror();
972         }
973         rendez_sleep(&c->cr, connected, c);
974         qlock(&c->qlock);
975         poperror();
976
977         if(c->cerr[0] != '\0')
978                 error(c->cerr);
979 }
980
981 /*
982  *  called by protocol announce routine to set addresses
983  */
984 char*
985 Fsstdannounce(struct conv* c, char* argv[], int argc)
986 {
987         memset(c->raddr, 0, sizeof(c->raddr));
988         c->rport = 0;
989         switch(argc){
990         default:
991                 return "bad args to announce";
992         case 2:
993                 return setladdrport(c, argv[1], 1);
994         }
995 }
996
997 /*
998  *  initiate announcement and sleep till its set up
999  */
1000 static int
1001 announced(void* a)
1002 {
1003         return ((struct conv*)a)->state == Announced;
1004 }
1005 static void
1006 announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1007 {
1008         ERRSTACK(2);
1009         char *p;
1010
1011         if(c->state != 0)
1012                 error(Econinuse);
1013         c->state = Announcing;
1014         c->cerr[0] = '\0';
1015         if(x->announce == NULL)
1016                 error("announce not supported");
1017         p = x->announce(c, cb->f, cb->nf);
1018         if(p != NULL)
1019                 error(p);
1020
1021         qunlock(&c->qlock);
1022         if(waserror()){
1023                 qlock(&c->qlock);
1024                 nexterror();
1025         }
1026         rendez_sleep(&c->cr, announced, c);
1027         qlock(&c->qlock);
1028         poperror();
1029
1030         if(c->cerr[0] != '\0')
1031                 error(c->cerr);
1032 }
1033
1034 /*
1035  *  called by protocol bind routine to set addresses
1036  */
1037 char*
1038 Fsstdbind(struct conv* c, char* argv[], int argc)
1039 {
1040         switch(argc){
1041         default:
1042                 return "bad args to bind";
1043         case 2:
1044                 return setladdrport(c, argv[1], 0);
1045         }
1046 }
1047
1048 static void
1049 bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1050 {
1051         char *p;
1052
1053         if(x->bind == NULL)
1054                 p = Fsstdbind(c, cb->f, cb->nf);
1055         else
1056                 p = x->bind(c, cb->f, cb->nf);
1057         if(p != NULL)
1058                 error(p);
1059 }
1060
1061 static void
1062 tosctlmsg(struct conv *c, struct cmdbuf *cb)
1063 {
1064         if(cb->nf < 2)
1065                 c->tos = 0;
1066         else
1067                 c->tos = atoi(cb->f[1]);
1068 }
1069
1070 static void
1071 ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1072 {
1073         if(cb->nf < 2)
1074                 c->ttl = MAXTTL;
1075         else
1076                 c->ttl = atoi(cb->f[1]);
1077 }
1078
1079 static long
1080 ipwrite(struct chan* ch, void *v, long n, int64_t off)
1081 {
1082         ERRSTACK(2);
1083         struct conv *c;
1084         struct Proto *x;
1085         char *p;
1086         struct cmdbuf *cb;
1087         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1088         struct Fs *f;
1089         char *a;
1090
1091         a = v;
1092         f = ipfs[ch->dev];
1093
1094         switch(TYPE(ch->qid)){
1095         default:
1096                 error(Eperm);
1097         case Qdata:
1098                 x = f->p[PROTO(ch->qid)];
1099                 c = x->conv[CONV(ch->qid)];
1100
1101                 if(c->wq == NULL)
1102                         error(Eperm);
1103
1104                 qwrite(c->wq, a, n);
1105                 break;
1106         case Qarp:
1107                 return arpwrite(f, a, n);
1108         case Qiproute:
1109                 return routewrite(f, ch, a, n);
1110         case Qlog:
1111                 netlogctl(f, a, n);
1112                 return n;
1113         case Qndb:
1114                 return ndbwrite(f, a, off, n);
1115         case Qctl:
1116                 x = f->p[PROTO(ch->qid)];
1117                 c = x->conv[CONV(ch->qid)];
1118                 cb = parsecmd(a, n);
1119
1120                 qlock(&c->qlock);
1121                 if(waserror()) {
1122                         qunlock(&c->qlock);
1123                         kfree(cb);
1124                         nexterror();
1125                 }
1126                 if(cb->nf < 1)
1127                         error("short control request");
1128                 if(strcmp(cb->f[0], "connect") == 0)
1129                         connectctlmsg(x, c, cb);
1130                 else if(strcmp(cb->f[0], "announce") == 0)
1131                         announcectlmsg(x, c, cb);
1132                 else if(strcmp(cb->f[0], "bind") == 0)
1133                         bindctlmsg(x, c, cb);
1134                 else if(strcmp(cb->f[0], "ttl") == 0)
1135                         ttlctlmsg(c, cb);
1136                 else if(strcmp(cb->f[0], "tos") == 0)
1137                         tosctlmsg(c, cb);
1138                 else if(strcmp(cb->f[0], "ignoreadvice") == 0)
1139                         c->ignoreadvice = 1;
1140                 else if(strcmp(cb->f[0], "addmulti") == 0){
1141                         if(cb->nf < 2)
1142                                 error("addmulti needs interface address");
1143                         if(cb->nf == 2){
1144                                 if(!ipismulticast(c->raddr))
1145                                         error("addmulti for a non multicast address");
1146                                 parseip(ia, cb->f[1]);
1147                                 ipifcaddmulti(c, c->raddr, ia);
1148                         } else {
1149                                 parseip(ma, cb->f[2]);
1150                                 if(!ipismulticast(ma))
1151                                         error("addmulti for a non multicast address");
1152                                 parseip(ia, cb->f[1]);
1153                                 ipifcaddmulti(c, ma, ia);
1154                         }
1155                 } else if(strcmp(cb->f[0], "remmulti") == 0){
1156                         if(cb->nf < 2)
1157                                 error("remmulti needs interface address");
1158                         if(!ipismulticast(c->raddr))
1159                                 error("remmulti for a non multicast address");
1160                         parseip(ia, cb->f[1]);
1161                         ipifcremmulti(c, c->raddr, ia);
1162                 } else if(x->ctl != NULL) {
1163                         p = x->ctl(c, cb->f, cb->nf);
1164                         if(p != NULL)
1165                                 error(p);
1166                 } else
1167                         error("unknown control request");
1168                 qunlock(&c->qlock);
1169                 kfree(cb);
1170                 poperror();
1171         }
1172         return n;
1173 }
1174
1175 static long
1176 ipbwrite(struct chan* ch, struct block* bp, uint32_t offset)
1177 {
1178         struct conv *c;
1179         struct Proto *x;
1180         struct Fs *f;
1181         int n;
1182
1183         switch(TYPE(ch->qid)){
1184         case Qdata:
1185                 f = ipfs[ch->dev];
1186                 x = f->p[PROTO(ch->qid)];
1187                 c = x->conv[CONV(ch->qid)];
1188
1189                 if(c->wq == NULL)
1190                         error(Eperm);
1191
1192                 if(bp->next)
1193                         bp = concatblock(bp);
1194                 n = BLEN(bp);
1195                 qbwrite(c->wq, bp);
1196                 return n;
1197         default:
1198                 return devbwrite(ch, bp, offset);
1199         }
1200 }
1201
1202 struct dev ipdevtab = {
1203         'I',
1204         "ip",
1205
1206         ipreset,
1207         devinit,
1208         devshutdown,
1209         ipattach,
1210         ipwalk,
1211         ipstat,
1212         ipopen,
1213         devcreate,
1214         ipclose,
1215         ipread,
1216         ipbread,
1217         ipwrite,
1218         ipbwrite,
1219         devremove,
1220         ipwstat,
1221 };
1222
1223 int
1224 Fsproto(struct Fs *f, struct Proto *p)
1225 {
1226         if(f->np >= Maxproto)
1227                 return -1;
1228
1229         p->f = f;
1230
1231         if(p->ipproto > 0){
1232                 if(f->t2p[p->ipproto] != NULL)
1233                         return -1;
1234                 f->t2p[p->ipproto] = p;
1235         }
1236
1237         p->qid.type = QTDIR;
1238         p->qid.path = QID(f->np, 0, Qprotodir);
1239         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1240         if(p->conv == NULL)
1241                 panic("Fsproto");
1242
1243         p->x = f->np;
1244         p->nextport = 0;
1245         p->nextrport = 600;
1246         f->p[f->np++] = p;
1247
1248         return 0;
1249 }
1250
1251 /*
1252  *  return true if this protocol is
1253  *  built in
1254  */
1255 int
1256 Fsbuiltinproto(struct Fs* f, uint8_t proto)
1257 {
1258         return f->t2p[proto] != NULL;
1259 }
1260
1261 /*
1262  *  called with protocol locked
1263  */
1264 struct conv*
1265 Fsprotoclone(struct Proto *p, char *user)
1266 {
1267         struct conv *c, **pp, **ep;
1268
1269 retry:
1270         c = NULL;
1271         ep = &p->conv[p->nc];
1272         for(pp = p->conv; pp < ep; pp++) {
1273                 c = *pp;
1274                 if(c == NULL){
1275                         c = kzmalloc(sizeof(struct conv), 0);
1276                         if(c == NULL)
1277                                 error(Enomem);
1278                         qlock(&c->qlock);
1279                         c->p = p;
1280                         c->x = pp - p->conv;
1281                         if(p->ptclsize != 0){
1282                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1283                                 if(c->ptcl == NULL) {
1284                                         kfree(c);
1285                                         error(Enomem);
1286                                 }
1287                         }
1288                         *pp = c;
1289                         p->ac++;
1290                         c->eq = qopen(1024, Qmsg, 0, 0);
1291                         (*p->create)(c);
1292                         break;
1293                 }
1294                 if(canqlock(&c->qlock)){
1295                         /*
1296                          *  make sure both processes and protocol
1297                          *  are done with this Conv
1298                          */
1299                         if(c->inuse == 0 && (p->inuse == NULL || (*p->inuse)(c) == 0))
1300                                 break;
1301
1302                         qunlock(&c->qlock);
1303                 }
1304         }
1305         if(pp >= ep) {
1306                 if(p->gc != NULL && (*p->gc)(p))
1307                         goto retry;
1308                 return NULL;
1309         }
1310
1311         c->inuse = 1;
1312         kstrdup(&c->owner, user);
1313         c->perm = 0660;
1314         c->state = Idle;
1315         ipmove(c->laddr, IPnoaddr);
1316         ipmove(c->raddr, IPnoaddr);
1317         c->r = NULL;
1318         c->rgen = 0;
1319         c->lport = 0;
1320         c->rport = 0;
1321         c->restricted = 0;
1322         c->ttl = MAXTTL;
1323         c->tos = DFLTTOS;
1324         qreopen(c->rq);
1325         qreopen(c->wq);
1326         qreopen(c->eq);
1327
1328         qunlock(&c->qlock);
1329         return c;
1330 }
1331
1332 int
1333 Fsconnected(struct conv* c, char* msg)
1334 {
1335         if(msg != NULL && *msg != '\0')
1336                 strncpy(c->cerr, msg, sizeof(c->cerr));
1337
1338         switch(c->state){
1339
1340         case Announcing:
1341                 c->state = Announced;
1342                 break;
1343
1344         case Connecting:
1345                 c->state = Connected;
1346                 break;
1347         }
1348
1349         rendez_wakeup(&c->cr);
1350         return 0;
1351 }
1352
1353 struct Proto*
1354 Fsrcvpcol(struct Fs* f, uint8_t proto)
1355 {
1356         if(f->ipmux)
1357                 return f->ipmux;
1358         else
1359                 return f->t2p[proto];
1360 }
1361
1362 struct Proto*
1363 Fsrcvpcolx(struct Fs *f, uint8_t proto)
1364 {
1365         return f->t2p[proto];
1366 }
1367
1368 /*
1369  *  called with protocol locked
1370  */
1371 struct conv*
1372 Fsnewcall(struct conv *c, uint8_t *raddr, uint16_t rport, uint8_t *laddr,
1373           uint16_t lport, uint8_t version)
1374 {
1375         struct conv *nc;
1376         struct conv **l;
1377         int i;
1378
1379         qlock(&c->qlock);
1380         i = 0;
1381         for(l = &c->incall; *l; l = &(*l)->next)
1382                 i++;
1383         if(i >= Maxincall) {
1384                 qunlock(&c->qlock);
1385                 return NULL;
1386         }
1387
1388         /* find a free conversation */
1389         nc = Fsprotoclone(c->p, network);
1390         if(nc == NULL) {
1391                 qunlock(&c->qlock);
1392                 return NULL;
1393         }
1394         ipmove(nc->raddr, raddr);
1395         nc->rport = rport;
1396         ipmove(nc->laddr, laddr);
1397         nc->lport = lport;
1398         nc->next = NULL;
1399         *l = nc;
1400         nc->state = Connected;
1401         nc->ipversion = version;
1402
1403         qunlock(&c->qlock);
1404
1405         rendez_wakeup(&c->listenr);
1406
1407         return nc;
1408 }
1409
1410 static long
1411 ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1412 {
1413         if(off > strlen(f->ndb))
1414                 error(Eio);
1415         if(off+n >= sizeof(f->ndb)-1)
1416                 error(Eio);
1417         memmove(f->ndb+off, a, n);
1418         f->ndb[off+n] = 0;
1419         f->ndbvers++;
1420         f->ndbmtime = seconds();
1421         return n;
1422 }
1423
1424 uint32_t
1425 scalednconv(void)
1426 {
1427         //if(conf.npage*BY2PG >= 128*MB)
1428                 return Nchans*4;
1429                 //      return Nchans;
1430 }