Adds chaninfo()
[akaros.git] / kern / src / net / ipv6.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 #include <vfs.h>
17 #include <kfs.h>
18 #include <slab.h>
19 #include <kmalloc.h>
20 #include <kref.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <assert.h>
24 #include <error.h>
25 #include <cpio.h>
26 #include <pmap.h>
27 #include <smp.h>
28 #include <ip.h>
29
30 enum
31 {
32         IP4HDR          = 20,           /* sizeof(Ip4hdr) */
33         IP6HDR          = 40,           /* sizeof(Ip6hdr) */
34         IP_HLEN4        = 0x05,         /* Header length in words */
35         IP_DF           = 0x4000,       /* Don't fragment */
36         IP_MF           = 0x2000,       /* More fragments */
37         IP6FHDR         = 8,            /* sizeof(Fraghdr6) */
38         IP_MAX          = (32*1024),    /* Maximum Internet packet size */
39 };
40
41 #define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
42 #define BLKIPVER(xp)    (((struct ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
43 #define NEXT_ID(x) (__sync_add_and_fetch(&(x), 1))
44 /*
45  * This sleazy macro is stolen shamelessly from ip.c, see comment there.
46  */
47 #define BKFG(xp)        ((struct Ipfrag*)((xp)->base))
48 struct fragment6;
49
50 struct block*           ip6reassemble(struct IP*, int unused_int, struct block*, struct ip6hdr*);
51 void            ipfragfree6(struct IP*, struct fragment6*);
52 struct fragment6*       ipfragallo6(struct IP*);
53 static struct block*            procxtns(struct IP *ip,
54                                              struct block *bp, int doreasm);
55 int             unfraglen(struct block *bp, uint8_t *nexthdr, int setfh);
56 struct block*           procopts(struct block *bp);
57
58 /* MIB II counters */
59 enum
60 {
61         Forwarding,
62         DefaultTTL,
63         InReceives,
64         InHdrErrors,
65         InAddrErrors,
66         ForwDatagrams,
67         InUnknownProtos,
68         InDiscards,
69         InDelivers,
70         OutRequests,
71         OutDiscards,
72         OutNoRoutes,
73         ReasmTimeout,
74         ReasmReqds,
75         ReasmOKs,
76         ReasmFails,
77         FragOKs,
78         FragFails,
79         FragCreates,
80
81         Nstats,
82 };
83
84 static char *statnames[] =
85 {
86 [Forwarding]    "Forwarding",
87 [DefaultTTL]    "DefaultTTL",
88 [InReceives]    "InReceives",
89 [InHdrErrors]   "InHdrErrors",
90 [InAddrErrors]  "InAddrErrors",
91 [ForwDatagrams] "ForwDatagrams",
92 [InUnknownProtos]       "InUnknownProtos",
93 [InDiscards]    "InDiscards",
94 [InDelivers]    "InDelivers",
95 [OutRequests]   "OutRequests",
96 [OutDiscards]   "OutDiscards",
97 [OutNoRoutes]   "OutNoRoutes",
98 [ReasmTimeout]  "ReasmTimeout",
99 [ReasmReqds]    "ReasmReqds",
100 [ReasmOKs]      "ReasmOKs",
101 [ReasmFails]    "ReasmFails",
102 [FragOKs]       "FragOKs",
103 [FragFails]     "FragFails",
104 [FragCreates]   "FragCreates",
105 };
106
107 struct Fragment4
108 {
109         struct block*   blist;
110         struct fragment4*       next;
111         uint32_t        src;
112         uint32_t        dst;
113         uint16_t        id;
114         uint32_t        age;
115 };
116
117 struct fragment6
118 {
119         struct block*   blist;
120         struct fragment6*       next;
121         uint8_t         src[IPaddrlen];
122         uint8_t         dst[IPaddrlen];
123         unsigned int    id;
124         uint32_t        age;
125 };
126
127 struct Ipfrag
128 {
129         uint16_t        foff;
130         uint16_t        flen;
131 };
132
133 /* an instance of IP */
134 struct IP
135 {
136         uint32_t                stats[Nstats];
137
138         qlock_t         fraglock4;
139         struct fragment4*       flisthead4;
140         struct fragment4*       fragfree4;
141         int             id4;
142
143         qlock_t         fraglock6;
144         struct fragment6*       flisthead6;
145         struct fragment6*       fragfree6;
146         int             id6;
147
148         int             iprouting;      /* true if we route like a gateway */
149 };
150
151 int
152 ipoput6(struct Fs *f,
153         struct block *bp, int gating, int ttl, int tos, struct conv *c)
154 {
155         ERRSTACK(1);
156         int tentative;
157         struct Ipifc *ifc;
158         uint8_t *gate, nexthdr;
159         struct ip6hdr *eh;
160         int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
161         struct route *r, *sr;
162         struct fraghdr6 fraghdr;
163         struct block *xp, *nb;
164         struct IP *ip;
165         int rv = 0;
166
167         ip = f->ip;
168
169         /* Fill out the ip header */
170         eh = (struct ip6hdr*)(bp->rp);
171
172         ip->stats[OutRequests]++;
173
174         /* Number of uint8_ts in data and ip header to write */
175         len = blocklen(bp);
176         
177         tentative = iptentative(f, eh->src);
178         if(tentative){
179                 netlog(f, Logip, "reject tx of packet with tentative src address\n");
180                 goto free;
181         }
182
183         if(gating){
184                 chunk = nhgets(eh->ploadlen);
185                 if(chunk > len){
186                         ip->stats[OutDiscards]++;
187                         netlog(f, Logip, "short gated packet\n");
188                         goto free;
189                 }
190                 if(chunk + IPV6HDR_LEN < len)
191                         len = chunk + IPV6HDR_LEN;
192         }
193
194         if(len >= IP_MAX){
195 //              print("len > IP_MAX, free\n");
196                 ip->stats[OutDiscards]++;
197                 netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
198                 goto free;
199         }
200
201         r = v6lookup(f, eh->dst, c);
202         if(r == NULL){
203 //              print("no route for %I, src %I free\n", eh->dst, eh->src);
204                 ip->stats[OutNoRoutes]++;
205                 netlog(f, Logip, "no interface %I\n", eh->dst);
206                 rv = -1;
207                 goto free;
208         }
209
210         ifc = r->rt.ifc;
211         if(r->rt.type & (Rifc|Runi))
212                 gate = eh->dst;
213         else
214         if(r->rt.type & (Rbcast|Rmulti)) {
215                 gate = eh->dst;
216                 sr = v6lookup(f, eh->src, NULL);
217                 if(sr != NULL && (sr->rt.type & Runi))
218                         ifc = sr->rt.ifc;
219         }
220         else
221                 gate = r->v6.gate;
222
223         if(!gating)
224                 eh->vcf[0] = IP_VER6;
225         eh->ttl = ttl;
226         if(!gating) {
227                 eh->vcf[0] |= (tos >> 4);
228                 eh->vcf[1] = (tos << 4);
229         }
230
231         if(!canrlock(&ifc->rwlock)) {
232                 goto free;
233         }
234
235         if(waserror()){
236                 runlock(&ifc->rwlock);
237                 nexterror();
238         }
239
240         if(ifc->m == NULL) {
241                 goto raise;
242         }
243
244         /* If we dont need to fragment just send it */
245         medialen = ifc->maxtu - ifc->m->hsize;
246         if(len <= medialen) {
247                 hnputs(eh->ploadlen, len-IPV6HDR_LEN);
248                 ifc->m->bwrite(ifc, bp, V6, gate);
249                 runlock(&ifc->rwlock);
250                 poperror();
251                 return 0;
252         }
253
254         if(gating) 
255         if(ifc->reassemble <= 0) {
256
257                 /* v6 intermediate nodes are not supposed to fragment pkts;
258                    we fragment if ifc->reassemble is turned on; an exception
259                    needed for nat.
260                  */
261
262                 ip->stats[OutDiscards]++;
263                 icmppkttoobig6(f, ifc, bp);
264                 netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
265                 goto raise;
266         }
267                 
268         /* start v6 fragmentation */
269         uflen = unfraglen(bp, &nexthdr, 1);
270         if(uflen > medialen) {
271                 ip->stats[FragFails]++;
272                 ip->stats[OutDiscards]++;
273                 netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
274                 goto raise;
275         }
276
277         flen = len - uflen;
278         seglen = (medialen - (uflen + IP6FHDR)) & ~7;
279         if(seglen < 8) {
280                 ip->stats[FragFails]++;
281                 ip->stats[OutDiscards]++;
282                 netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
283                 goto raise;
284         }
285
286         lid = NEXT_ID(ip->id6);
287         fraghdr.nexthdr = nexthdr;
288         fraghdr.res = 0;
289         hnputl(fraghdr.id, lid);
290
291         xp = bp;
292         offset = uflen;
293         while (xp != NULL && offset && offset >= BLEN(xp)) {
294                 offset -= BLEN(xp);
295                 xp = xp->next;
296         }
297         xp->rp += offset;
298
299         fragoff = 0; 
300         morefrags = 1;
301
302         for(; fragoff < flen; fragoff += seglen) {
303                 nb = allocb(uflen + IP6FHDR + seglen);
304
305                 if(fragoff + seglen >= flen) {
306                         seglen = flen - fragoff;
307                         morefrags = 0;
308                 }
309
310                 hnputs(eh->ploadlen, seglen+IP6FHDR);
311                 memmove(nb->wp, eh, uflen);
312                 nb->wp += uflen;
313
314                 hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
315                 fraghdr.offsetRM[1] |= morefrags;
316                 memmove(nb->wp, &fraghdr, IP6FHDR);
317                 nb->wp += IP6FHDR;
318
319                 /* Copy data */
320                 chunk = seglen;
321                 while (chunk) {
322                         if(!xp) {
323                                 ip->stats[OutDiscards]++;
324                                 ip->stats[FragFails]++;
325                                 freeblist(nb);
326                                 netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
327                                 goto raise;
328                         }
329                         blklen = chunk;
330                         if(BLEN(xp) < chunk)
331                                 blklen = BLEN(xp);
332                         memmove(nb->wp, xp->rp, blklen);
333
334                         nb->wp += blklen;
335                         xp->rp += blklen;
336                         chunk -= blklen;
337                         if(xp->rp == xp->wp)
338                                 xp = xp->next; 
339                 }
340
341                 ifc->m->bwrite(ifc, nb, V6, gate);
342                 ip->stats[FragCreates]++;
343         }
344         ip->stats[FragOKs]++;
345
346 raise:
347         runlock(&ifc->rwlock);
348         poperror();
349 free:
350         freeblist(bp);  
351         return rv;
352 }
353
354 void
355 ipiput6(struct Fs *f, struct Ipifc *ifc, struct block *bp)
356 {
357         int hl;
358         int hop, tos;
359         uint8_t proto;
360         struct ip6hdr *h;
361         struct Proto *p;
362         int notforme;
363         int tentative;
364         uint8_t v6dst[IPaddrlen];
365         struct IP *ip;
366         struct route *r, *sr;
367
368         ip = f->ip;
369         ip->stats[InReceives]++;
370
371         /*
372          *  Ensure we have all the header info in the first
373          *  block.  Make life easier for other protocols by
374          *  collecting up to the first 64 bytes in the first block.
375          */
376         if(BLEN(bp) < 64) {
377                 hl = blocklen(bp);
378                 if(hl < IP6HDR)
379                         hl = IP6HDR;
380                 if(hl > 64)
381                         hl = 64;
382                 bp = pullupblock(bp, hl);
383                 if(bp == NULL)
384                         return;
385         }
386
387         h = (struct ip6hdr *)(bp->rp);
388
389         memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
390         notforme = ipforme(f, v6dst) == 0;
391         tentative = iptentative(f, v6dst);
392   
393         if(tentative && (h->proto != ICMPv6)) {
394                 printd("tentative addr, drop\n");
395                 freeblist(bp);
396                 return;
397         }
398
399         /* Check header version */
400         if(BLKIPVER(bp) != IP_VER6) {
401                 ip->stats[InHdrErrors]++;
402                 netlog(f, Logip, "ip: bad version 0x%x\n", (h->vcf[0]&0xF0)>>2);
403                 freeblist(bp);
404                 return;
405         }
406
407         /* route */
408         if(notforme) {
409                 if(!ip->iprouting){
410                         freeb(bp);
411                         return;
412                 }
413                 /* don't forward to source's network */
414                 sr = v6lookup(f, h->src, NULL);
415                 r = v6lookup(f, h->dst, NULL);
416
417                 if(r == NULL || sr == r){
418                         ip->stats[OutDiscards]++;
419                         freeblist(bp);
420                         return;
421                 }
422
423                 /* don't forward if packet has timed out */
424                 hop = h->ttl;
425                 if(hop < 1) {
426                         ip->stats[InHdrErrors]++;
427                         icmpttlexceeded6(f, ifc, bp);
428                         freeblist(bp);
429                         return;
430                 }
431
432                 /* process headers & reassemble if the interface expects it */
433                 bp = procxtns(ip, bp, r->rt.ifc->reassemble);
434
435                 if(bp == NULL)
436                         return;
437
438                 ip->stats[ForwDatagrams]++;
439                 h = (struct ip6hdr *) (bp->rp);
440                 tos = IPV6CLASS(h);
441                 hop = h->ttl;
442                 ipoput6(f, bp, 1, hop-1, tos, NULL);
443                 return;
444         }
445
446         /* reassemble & process headers if needed */
447         bp = procxtns(ip, bp, 1);
448
449         if(bp == NULL)
450                 return;
451
452         h = (struct ip6hdr *) (bp->rp);
453         proto = h->proto;
454         p = Fsrcvpcol(f, proto);
455         if(p != NULL && p->rcv != NULL) {
456                 ip->stats[InDelivers]++;
457                 (*p->rcv)(p, ifc, bp);
458                 return;
459         }
460
461         ip->stats[InDiscards]++;
462         ip->stats[InUnknownProtos]++;
463         freeblist(bp);
464 }
465
466 /*
467  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
468  */
469 void
470 ipfragfree6(struct IP *ip, struct fragment6 *frag)
471 {
472         struct fragment6 *fl, **l;
473
474         if(frag->blist)
475                 freeblist(frag->blist);
476
477         memset(frag->src, 0, IPaddrlen);
478         frag->id = 0;
479         frag->blist = NULL;
480
481         l = &ip->flisthead6;
482         for(fl = *l; fl; fl = fl->next) {
483                 if(fl == frag) {
484                         *l = frag->next;
485                         break;
486                 }
487                 l = &fl->next;
488         }
489
490         frag->next = ip->fragfree6;
491         ip->fragfree6 = frag;
492
493 }
494
495 /*
496  * ipfragallo6 - copied from ipfragalloc4
497  */
498 struct fragment6*
499 ipfragallo6(struct IP *ip)
500 {
501         struct fragment6 *f;
502
503         while(ip->fragfree6 == NULL) {
504                 /* free last entry on fraglist */
505                 for(f = ip->flisthead6; f->next; f = f->next)
506                         ;
507                 ipfragfree6(ip, f);
508         }
509         f = ip->fragfree6;
510         ip->fragfree6 = f->next;
511         f->next = ip->flisthead6;
512         ip->flisthead6 = f;
513         f->age = NOW + 30000;
514
515         return f;
516 }
517
518 static struct block*
519 procxtns(struct IP *ip, struct block *bp, int doreasm) {
520
521         int offset;
522         uint8_t proto;
523         struct ip6hdr *h;
524
525         h = (struct ip6hdr *) (bp->rp);
526         offset = unfraglen(bp, &proto, 0);
527
528         if((proto == FH) && (doreasm != 0)) {
529                 bp = ip6reassemble(ip, offset, bp, h);
530                 if(bp == NULL) 
531                         return NULL; 
532                 offset = unfraglen(bp, &proto, 0);
533         }
534
535         if(proto == DOH || offset > IP6HDR) 
536                 bp = procopts(bp);
537
538         return bp;
539 }
540
541
542 /*      returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
543  *      hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
544  *      of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
545  *      field of the last header in the "Unfragmentable part" is set to FH.
546  */
547 int
548 unfraglen(struct block *bp, uint8_t *nexthdr, int setfh)
549 {
550         uint8_t *p, *q;
551         int ufl, hs;
552
553         p = bp->rp;
554         q = p+6;        /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
555         *nexthdr = *q;
556         ufl = IP6HDR;
557         p += ufl;
558
559         for(;;) {
560                 if(*nexthdr == HBH || *nexthdr == RH) {
561                         *nexthdr = *p;
562                         hs = ((int)*(p+1) + 1) * 8;
563                         ufl += hs;
564                         q = p;
565                         p += hs;
566                 }
567                 else
568                         break;
569         }
570
571         if(*nexthdr == FH)
572                 *q = *p;
573
574         if(setfh)
575                 *q = FH;
576
577         return ufl;
578 }
579
580 struct block*
581 procopts(struct block *bp)
582 {
583         return bp;
584 }
585
586 struct block*
587 ip6reassemble(struct IP* ip, int uflen, struct block* bp, struct ip6hdr* ih)
588 {
589
590         int fend, offset;
591         unsigned int id;
592         struct fragment6 *f, *fnext;
593         struct fraghdr6 *fraghdr;
594         uint8_t src[IPaddrlen], dst[IPaddrlen];
595         struct block *bl, **l, *last, *prev;
596         int ovlap, len, fragsize, pktposn;
597
598         fraghdr = (struct fraghdr6 *) (bp->rp + uflen);
599         memmove(src, ih->src, IPaddrlen);
600         memmove(dst, ih->dst, IPaddrlen);
601         id = nhgetl(fraghdr->id);
602         offset = nhgets(fraghdr->offsetRM) & ~7;
603
604         /*
605          *  block lists are too hard, pullupblock into a single block
606          */
607         if(bp->next){
608                 bp = pullupblock(bp, blocklen(bp));
609                 ih = (struct ip6hdr *)(bp->rp);
610         }
611
612
613         qlock(&ip->fraglock6);
614
615         /*
616          *  find a reassembly queue for this fragment
617          */
618         for(f = ip->flisthead6; f; f = fnext){
619                 fnext = f->next;
620                 if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
621                         break;
622                 if(f->age < NOW){
623                         ip->stats[ReasmTimeout]++;
624                         ipfragfree6(ip, f);
625                 }
626         }
627
628
629         /*
630          *  if this isn't a fragmented packet, accept it
631          *  and get rid of any fragments that might go
632          *  with it.
633          */
634         if(nhgets(fraghdr->offsetRM)==0) {      // first frag is also the last
635                 if(f != NULL) {
636                         ipfragfree6(ip, f);
637                         ip->stats[ReasmFails]++;
638                 }
639                 qunlock(&ip->fraglock6);
640                 return bp;
641         }
642
643         if(bp->base+sizeof(struct Ipfrag) >= bp->rp){
644                 bp = padblock(bp, sizeof(struct Ipfrag));
645                 bp->rp += sizeof(struct Ipfrag);
646         }
647
648         BKFG(bp)->foff = offset;
649         BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
650
651         /* First fragment allocates a reassembly queue */
652         if(f == NULL) {
653                 f = ipfragallo6(ip);
654                 f->id = id;
655                 memmove(f->src, src, IPaddrlen);
656                 memmove(f->dst, dst, IPaddrlen);
657
658                 f->blist = bp;
659
660                 qunlock(&ip->fraglock6);
661                 ip->stats[ReasmReqds]++;
662                 return NULL;
663         }
664
665         /*
666          *  find the new fragment's position in the queue
667          */
668         prev = NULL;
669         l = &f->blist;
670         bl = f->blist;
671         while(bl != NULL && BKFG(bp)->foff > BKFG(bl)->foff) {
672                 prev = bl;
673                 l = &bl->next;
674                 bl = bl->next;
675         }
676
677         /* Check overlap of a previous fragment - trim away as necessary */
678         if(prev) {
679                 ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
680                 if(ovlap > 0) {
681                         if(ovlap >= BKFG(bp)->flen) {
682                                 freeblist(bp);
683                                 qunlock(&ip->fraglock6);
684                                 return NULL;
685                         }
686                         BKFG(prev)->flen -= ovlap;
687                 }
688         }
689
690         /* Link onto assembly queue */
691         bp->next = *l;
692         *l = bp;
693
694         /* Check to see if succeeding segments overlap */
695         if(bp->next) {
696                 l = &bp->next;
697                 fend = BKFG(bp)->foff + BKFG(bp)->flen;
698
699                 /* Take completely covered segments out */
700
701                 while(*l) {
702                         ovlap = fend - BKFG(*l)->foff;
703
704                         if(ovlap <= 0) 
705                                 break; 
706                         if(ovlap < BKFG(*l)->flen) {
707                                 BKFG(*l)->flen -= ovlap;
708                                 BKFG(*l)->foff += ovlap;
709                                 /* move up ih hdrs */
710                                 memmove((*l)->rp + ovlap, (*l)->rp, uflen);
711                                 (*l)->rp += ovlap;
712                                 break;
713                         }
714                         last = (*l)->next;
715                         (*l)->next = NULL;
716                         freeblist(*l);
717                         *l = last;
718                 }
719         }
720
721         /*
722          *  look for a complete packet.  if we get to a fragment
723          *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
724          */
725         pktposn = 0;
726         for(bl = f->blist; bl; bl = bl->next) {
727                 if(BKFG(bl)->foff != pktposn)
728                         break;
729         
730                 fraghdr = (struct fraghdr6 *) (bl->rp + uflen);
731                 if((fraghdr->offsetRM[1] & 1) == 0) {
732                         bl = f->blist;
733
734                         /* get rid of frag header in first fragment */
735
736                         memmove(bl->rp + IP6FHDR, bl->rp, uflen);
737                         bl->rp += IP6FHDR;
738                         len = nhgets(((struct ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
739                         bl->wp = bl->rp + len + IP6HDR;
740
741                         /* Pullup all the fragment headers and
742                          * return a complete packet
743                          */
744                         for(bl = bl->next; bl; bl = bl->next) {
745                                 fragsize = BKFG(bl)->flen;
746                                 len += fragsize;
747                                 bl->rp += uflen + IP6FHDR;
748                                 bl->wp = bl->rp + fragsize;
749                         }
750
751                         bl = f->blist;
752                         f->blist = NULL;
753                         ipfragfree6(ip, f);
754                         ih = (struct ip6hdr*)(bl->rp);
755                         hnputs(ih->ploadlen, len);
756                         qunlock(&ip->fraglock6);
757                         ip->stats[ReasmOKs]++;
758                         return bl;              
759                 }
760                 pktposn += BKFG(bl)->flen;
761         }
762         qunlock(&ip->fraglock6);
763         return NULL;
764 }
765