Netaux functions and devmnt.
[akaros.git] / kern / src / net / ipv6.c
1 #include        "u.h"
2 #include        "../port/lib.h"
3 #include        "mem.h"
4 #include        "dat.h"
5 #include        "fns.h"
6 #include        "../port/error.h"
7
8 #include        "ip.h"
9 #include        "ipv6.h"
10
11 enum
12 {
13         IP4HDR          = 20,           /* sizeof(Ip4hdr) */
14         IP6HDR          = 40,           /* sizeof(Ip6hdr) */
15         IP_HLEN4        = 0x05,         /* Header length in words */
16         IP_DF           = 0x4000,       /* Don't fragment */
17         IP_MF           = 0x2000,       /* More fragments */
18         IP6FHDR         = 8,            /* sizeof(Fraghdr6) */
19         IP_MAX          = (32*1024),    /* Maximum Internet packet size */
20 };
21
22 #define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
23 #define BLKIPVER(xp)    (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
24 /*
25  * This sleazy macro is stolen shamelessly from ip.c, see comment there.
26  */
27 #define BKFG(xp)        ((Ipfrag*)((xp)->base))
28
29 typedef struct  IP      IP;
30 typedef struct  Fragment4       Fragment4;
31 typedef struct  Fragment6       Fragment6;
32 typedef struct  Ipfrag  Ipfrag;
33
34 Block*          ip6reassemble(IP*, int, Block*, Ip6hdr*);
35 void            ipfragfree6(IP*, Fragment6*);
36 Fragment6*      ipfragallo6(IP*);
37 static Block*           procxtns(IP *ip, Block *bp, int doreasm);
38 int             unfraglen(Block *bp, uchar *nexthdr, int setfh);
39 Block*          procopts(Block *bp);
40
41 /* MIB II counters */
42 enum
43 {
44         Forwarding,
45         DefaultTTL,
46         InReceives,
47         InHdrErrors,
48         InAddrErrors,
49         ForwDatagrams,
50         InUnknownProtos,
51         InDiscards,
52         InDelivers,
53         OutRequests,
54         OutDiscards,
55         OutNoRoutes,
56         ReasmTimeout,
57         ReasmReqds,
58         ReasmOKs,
59         ReasmFails,
60         FragOKs,
61         FragFails,
62         FragCreates,
63
64         Nstats,
65 };
66
67 static char *statnames[] =
68 {
69 [Forwarding]    "Forwarding",
70 [DefaultTTL]    "DefaultTTL",
71 [InReceives]    "InReceives",
72 [InHdrErrors]   "InHdrErrors",
73 [InAddrErrors]  "InAddrErrors",
74 [ForwDatagrams] "ForwDatagrams",
75 [InUnknownProtos]       "InUnknownProtos",
76 [InDiscards]    "InDiscards",
77 [InDelivers]    "InDelivers",
78 [OutRequests]   "OutRequests",
79 [OutDiscards]   "OutDiscards",
80 [OutNoRoutes]   "OutNoRoutes",
81 [ReasmTimeout]  "ReasmTimeout",
82 [ReasmReqds]    "ReasmReqds",
83 [ReasmOKs]      "ReasmOKs",
84 [ReasmFails]    "ReasmFails",
85 [FragOKs]       "FragOKs",
86 [FragFails]     "FragFails",
87 [FragCreates]   "FragCreates",
88 };
89
90 struct Fragment4
91 {
92         Block*  blist;
93         Fragment4*      next;
94         ulong   src;
95         ulong   dst;
96         ushort  id;
97         ulong   age;
98 };
99
100 struct Fragment6
101 {
102         Block*  blist;
103         Fragment6*      next;
104         uchar   src[IPaddrlen];
105         uchar   dst[IPaddrlen];
106         uint    id;
107         ulong   age;
108 };
109
110 struct Ipfrag
111 {
112         ushort  foff;
113         ushort  flen;
114 };
115
116 /* an instance of IP */
117 struct IP
118 {
119         ulong           stats[Nstats];
120
121         QLock           fraglock4;
122         Fragment4*      flisthead4;
123         Fragment4*      fragfree4;
124         Ref             id4;
125
126         QLock           fraglock6;
127         Fragment6*      flisthead6;
128         Fragment6*      fragfree6;
129         Ref             id6;
130
131         int             iprouting;      /* true if we route like a gateway */
132 };
133
134 int
135 ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
136 {
137         int tentative;
138         Ipifc *ifc;
139         uchar *gate, nexthdr;
140         Ip6hdr *eh;
141         int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
142         Route *r, *sr;
143         Fraghdr6 fraghdr;
144         Block *xp, *nb;
145         IP *ip;
146         int rv = 0;
147
148         ip = f->ip;
149
150         /* Fill out the ip header */
151         eh = (Ip6hdr*)(bp->rp);
152
153         ip->stats[OutRequests]++;
154
155         /* Number of uchars in data and ip header to write */
156         len = blocklen(bp);
157         
158         tentative = iptentative(f, eh->src);
159         if(tentative){
160                 netlog(f, Logip, "reject tx of packet with tentative src address\n");
161                 goto free;
162         }
163
164         if(gating){
165                 chunk = nhgets(eh->ploadlen);
166                 if(chunk > len){
167                         ip->stats[OutDiscards]++;
168                         netlog(f, Logip, "short gated packet\n");
169                         goto free;
170                 }
171                 if(chunk + IPV6HDR_LEN < len)
172                         len = chunk + IPV6HDR_LEN;
173         }
174
175         if(len >= IP_MAX){
176 //              print("len > IP_MAX, free\n");
177                 ip->stats[OutDiscards]++;
178                 netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
179                 goto free;
180         }
181
182         r = v6lookup(f, eh->dst, c);
183         if(r == nil){
184 //              print("no route for %I, src %I free\n", eh->dst, eh->src);
185                 ip->stats[OutNoRoutes]++;
186                 netlog(f, Logip, "no interface %I\n", eh->dst);
187                 rv = -1;
188                 goto free;
189         }
190
191         ifc = r->ifc;
192         if(r->type & (Rifc|Runi))
193                 gate = eh->dst;
194         else
195         if(r->type & (Rbcast|Rmulti)) {
196                 gate = eh->dst;
197                 sr = v6lookup(f, eh->src, nil);
198                 if(sr != nil && (sr->type & Runi))
199                         ifc = sr->ifc;
200         }
201         else
202                 gate = r->v6.gate;
203
204         if(!gating)
205                 eh->vcf[0] = IP_VER6;
206         eh->ttl = ttl;
207         if(!gating) {
208                 eh->vcf[0] |= (tos >> 4);
209                 eh->vcf[1] = (tos << 4);
210         }
211
212         if(!canrlock(ifc)) {
213                 goto free;
214         }
215
216         if(waserror()){
217                 runlock(ifc);
218                 nexterror();
219         }
220
221         if(ifc->m == nil) {
222                 goto raise;
223         }
224
225         /* If we dont need to fragment just send it */
226         medialen = ifc->maxtu - ifc->m->hsize;
227         if(len <= medialen) {
228                 hnputs(eh->ploadlen, len-IPV6HDR_LEN);
229                 ifc->m->bwrite(ifc, bp, V6, gate);
230                 runlock(ifc);
231                 poperror();
232                 return 0;
233         }
234
235         if(gating) 
236         if(ifc->reassemble <= 0) {
237
238                 /* v6 intermediate nodes are not supposed to fragment pkts;
239                    we fragment if ifc->reassemble is turned on; an exception
240                    needed for nat.
241                  */
242
243                 ip->stats[OutDiscards]++;
244                 icmppkttoobig6(f, ifc, bp);
245                 netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
246                 goto raise;
247         }
248                 
249         /* start v6 fragmentation */
250         uflen = unfraglen(bp, &nexthdr, 1);
251         if(uflen > medialen) {
252                 ip->stats[FragFails]++;
253                 ip->stats[OutDiscards]++;
254                 netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
255                 goto raise;
256         }
257
258         flen = len - uflen;
259         seglen = (medialen - (uflen + IP6FHDR)) & ~7;
260         if(seglen < 8) {
261                 ip->stats[FragFails]++;
262                 ip->stats[OutDiscards]++;
263                 netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
264                 goto raise;
265         }
266
267         lid = incref(&ip->id6);
268         fraghdr.nexthdr = nexthdr;
269         fraghdr.res = 0;
270         hnputl(fraghdr.id, lid);
271
272         xp = bp;
273         offset = uflen;
274         while (xp != nil && offset && offset >= BLEN(xp)) {
275                 offset -= BLEN(xp);
276                 xp = xp->next;
277         }
278         xp->rp += offset;
279
280         fragoff = 0; 
281         morefrags = 1;
282
283         for(; fragoff < flen; fragoff += seglen) {
284                 nb = allocb(uflen + IP6FHDR + seglen);
285
286                 if(fragoff + seglen >= flen) {
287                         seglen = flen - fragoff;
288                         morefrags = 0;
289                 }
290
291                 hnputs(eh->ploadlen, seglen+IP6FHDR);
292                 memmove(nb->wp, eh, uflen);
293                 nb->wp += uflen;
294
295                 hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
296                 fraghdr.offsetRM[1] |= morefrags;
297                 memmove(nb->wp, &fraghdr, IP6FHDR);
298                 nb->wp += IP6FHDR;
299
300                 /* Copy data */
301                 chunk = seglen;
302                 while (chunk) {
303                         if(!xp) {
304                                 ip->stats[OutDiscards]++;
305                                 ip->stats[FragFails]++;
306                                 freeblist(nb);
307                                 netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
308                                 goto raise;
309                         }
310                         blklen = chunk;
311                         if(BLEN(xp) < chunk)
312                                 blklen = BLEN(xp);
313                         memmove(nb->wp, xp->rp, blklen);
314
315                         nb->wp += blklen;
316                         xp->rp += blklen;
317                         chunk -= blklen;
318                         if(xp->rp == xp->wp)
319                                 xp = xp->next; 
320                 }
321
322                 ifc->m->bwrite(ifc, nb, V6, gate);
323                 ip->stats[FragCreates]++;
324         }
325         ip->stats[FragOKs]++;
326
327 raise:
328         runlock(ifc);
329         poperror();
330 free:
331         freeblist(bp);  
332         return rv;
333 }
334
335 void
336 ipiput6(Fs *f, Ipifc *ifc, Block *bp)
337 {
338         int hl;
339         int hop, tos;
340         uchar proto;
341         Ip6hdr *h;
342         Proto *p;
343         int notforme;
344         int tentative;
345         uchar v6dst[IPaddrlen];
346         IP *ip;
347         Route *r, *sr;
348
349         ip = f->ip;
350         ip->stats[InReceives]++;
351
352         /*
353          *  Ensure we have all the header info in the first
354          *  block.  Make life easier for other protocols by
355          *  collecting up to the first 64 bytes in the first block.
356          */
357         if(BLEN(bp) < 64) {
358                 hl = blocklen(bp);
359                 if(hl < IP6HDR)
360                         hl = IP6HDR;
361                 if(hl > 64)
362                         hl = 64;
363                 bp = pullupblock(bp, hl);
364                 if(bp == nil)
365                         return;
366         }
367
368         h = (Ip6hdr *)(bp->rp);
369
370         memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
371         notforme = ipforme(f, v6dst) == 0;
372         tentative = iptentative(f, v6dst);
373   
374         if(tentative && (h->proto != ICMPv6)) {
375                 print("tentative addr, drop\n");
376                 freeblist(bp);
377                 return;
378         }
379
380         /* Check header version */
381         if(BLKIPVER(bp) != IP_VER6) {
382                 ip->stats[InHdrErrors]++;
383                 netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
384                 freeblist(bp);
385                 return;
386         }
387
388         /* route */
389         if(notforme) {
390                 if(!ip->iprouting){
391                         freeb(bp);
392                         return;
393                 }
394                 /* don't forward to source's network */
395                 sr = v6lookup(f, h->src, nil);
396                 r = v6lookup(f, h->dst, nil);
397
398                 if(r == nil || sr == r){
399                         ip->stats[OutDiscards]++;
400                         freeblist(bp);
401                         return;
402                 }
403
404                 /* don't forward if packet has timed out */
405                 hop = h->ttl;
406                 if(hop < 1) {
407                         ip->stats[InHdrErrors]++;
408                         icmpttlexceeded6(f, ifc, bp);
409                         freeblist(bp);
410                         return;
411                 }
412
413                 /* process headers & reassemble if the interface expects it */
414                 bp = procxtns(ip, bp, r->ifc->reassemble);
415
416                 if(bp == nil)
417                         return;
418
419                 ip->stats[ForwDatagrams]++;
420                 h = (Ip6hdr *) (bp->rp);
421                 tos = IPV6CLASS(h);
422                 hop = h->ttl;
423                 ipoput6(f, bp, 1, hop-1, tos, nil);
424                 return;
425         }
426
427         /* reassemble & process headers if needed */
428         bp = procxtns(ip, bp, 1);
429
430         if(bp == nil)
431                 return;
432
433         h = (Ip6hdr *) (bp->rp);
434         proto = h->proto;
435         p = Fsrcvpcol(f, proto);
436         if(p != nil && p->rcv != nil) {
437                 ip->stats[InDelivers]++;
438                 (*p->rcv)(p, ifc, bp);
439                 return;
440         }
441
442         ip->stats[InDiscards]++;
443         ip->stats[InUnknownProtos]++;
444         freeblist(bp);
445 }
446
447 /*
448  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
449  */
450 void
451 ipfragfree6(IP *ip, Fragment6 *frag)
452 {
453         Fragment6 *fl, **l;
454
455         if(frag->blist)
456                 freeblist(frag->blist);
457
458         memset(frag->src, 0, IPaddrlen);
459         frag->id = 0;
460         frag->blist = nil;
461
462         l = &ip->flisthead6;
463         for(fl = *l; fl; fl = fl->next) {
464                 if(fl == frag) {
465                         *l = frag->next;
466                         break;
467                 }
468                 l = &fl->next;
469         }
470
471         frag->next = ip->fragfree6;
472         ip->fragfree6 = frag;
473
474 }
475
476 /*
477  * ipfragallo6 - copied from ipfragalloc4
478  */
479 Fragment6*
480 ipfragallo6(IP *ip)
481 {
482         Fragment6 *f;
483
484         while(ip->fragfree6 == nil) {
485                 /* free last entry on fraglist */
486                 for(f = ip->flisthead6; f->next; f = f->next)
487                         ;
488                 ipfragfree6(ip, f);
489         }
490         f = ip->fragfree6;
491         ip->fragfree6 = f->next;
492         f->next = ip->flisthead6;
493         ip->flisthead6 = f;
494         f->age = NOW + 30000;
495
496         return f;
497 }
498
499 static Block*
500 procxtns(IP *ip, Block *bp, int doreasm) {
501
502         int offset;
503         uchar proto;
504         Ip6hdr *h;
505
506         h = (Ip6hdr *) (bp->rp);
507         offset = unfraglen(bp, &proto, 0);
508
509         if((proto == FH) && (doreasm != 0)) {
510                 bp = ip6reassemble(ip, offset, bp, h);
511                 if(bp == nil) 
512                         return nil; 
513                 offset = unfraglen(bp, &proto, 0);
514         }
515
516         if(proto == DOH || offset > IP6HDR) 
517                 bp = procopts(bp);
518
519         return bp;
520 }
521
522
523 /*      returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
524  *      hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
525  *      of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
526  *      field of the last header in the "Unfragmentable part" is set to FH.
527  */
528 int
529 unfraglen(Block *bp, uchar *nexthdr, int setfh)
530 {
531         uchar *p, *q;
532         int ufl, hs;
533
534         p = bp->rp;
535         q = p+6;        /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
536         *nexthdr = *q;
537         ufl = IP6HDR;
538         p += ufl;
539
540         for(;;) {
541                 if(*nexthdr == HBH || *nexthdr == RH) {
542                         *nexthdr = *p;
543                         hs = ((int)*(p+1) + 1) * 8;
544                         ufl += hs;
545                         q = p;
546                         p += hs;
547                 }
548                 else
549                         break;
550         }
551
552         if(*nexthdr == FH)
553                 *q = *p;
554
555         if(setfh)
556                 *q = FH;
557
558         return ufl;
559 }
560
561 Block*
562 procopts(Block *bp)
563 {
564         return bp;
565 }
566
567 Block*
568 ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
569 {
570
571         int fend, offset;
572         uint id;
573         Fragment6 *f, *fnext;
574         Fraghdr6 *fraghdr;
575         uchar src[IPaddrlen], dst[IPaddrlen];
576         Block *bl, **l, *last, *prev;
577         int ovlap, len, fragsize, pktposn;
578
579         fraghdr = (Fraghdr6 *) (bp->rp + uflen);
580         memmove(src, ih->src, IPaddrlen);
581         memmove(dst, ih->dst, IPaddrlen);
582         id = nhgetl(fraghdr->id);
583         offset = nhgets(fraghdr->offsetRM) & ~7;
584
585         /*
586          *  block lists are too hard, pullupblock into a single block
587          */
588         if(bp->next){
589                 bp = pullupblock(bp, blocklen(bp));
590                 ih = (Ip6hdr *)(bp->rp);
591         }
592
593
594         qlock(&ip->fraglock6);
595
596         /*
597          *  find a reassembly queue for this fragment
598          */
599         for(f = ip->flisthead6; f; f = fnext){
600                 fnext = f->next;
601                 if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
602                         break;
603                 if(f->age < NOW){
604                         ip->stats[ReasmTimeout]++;
605                         ipfragfree6(ip, f);
606                 }
607         }
608
609
610         /*
611          *  if this isn't a fragmented packet, accept it
612          *  and get rid of any fragments that might go
613          *  with it.
614          */
615         if(nhgets(fraghdr->offsetRM)==0) {      // first frag is also the last
616                 if(f != nil) {
617                         ipfragfree6(ip, f);
618                         ip->stats[ReasmFails]++;
619                 }
620                 qunlock(&ip->fraglock6);
621                 return bp;
622         }
623
624         if(bp->base+sizeof(Ipfrag) >= bp->rp){
625                 bp = padblock(bp, sizeof(Ipfrag));
626                 bp->rp += sizeof(Ipfrag);
627         }
628
629         BKFG(bp)->foff = offset;
630         BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
631
632         /* First fragment allocates a reassembly queue */
633         if(f == nil) {
634                 f = ipfragallo6(ip);
635                 f->id = id;
636                 memmove(f->src, src, IPaddrlen);
637                 memmove(f->dst, dst, IPaddrlen);
638
639                 f->blist = bp;
640
641                 qunlock(&ip->fraglock6);
642                 ip->stats[ReasmReqds]++;
643                 return nil;
644         }
645
646         /*
647          *  find the new fragment's position in the queue
648          */
649         prev = nil;
650         l = &f->blist;
651         bl = f->blist;
652         while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
653                 prev = bl;
654                 l = &bl->next;
655                 bl = bl->next;
656         }
657
658         /* Check overlap of a previous fragment - trim away as necessary */
659         if(prev) {
660                 ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
661                 if(ovlap > 0) {
662                         if(ovlap >= BKFG(bp)->flen) {
663                                 freeblist(bp);
664                                 qunlock(&ip->fraglock6);
665                                 return nil;
666                         }
667                         BKFG(prev)->flen -= ovlap;
668                 }
669         }
670
671         /* Link onto assembly queue */
672         bp->next = *l;
673         *l = bp;
674
675         /* Check to see if succeeding segments overlap */
676         if(bp->next) {
677                 l = &bp->next;
678                 fend = BKFG(bp)->foff + BKFG(bp)->flen;
679
680                 /* Take completely covered segments out */
681
682                 while(*l) {
683                         ovlap = fend - BKFG(*l)->foff;
684
685                         if(ovlap <= 0) 
686                                 break; 
687                         if(ovlap < BKFG(*l)->flen) {
688                                 BKFG(*l)->flen -= ovlap;
689                                 BKFG(*l)->foff += ovlap;
690                                 /* move up ih hdrs */
691                                 memmove((*l)->rp + ovlap, (*l)->rp, uflen);
692                                 (*l)->rp += ovlap;
693                                 break;
694                         }
695                         last = (*l)->next;
696                         (*l)->next = nil;
697                         freeblist(*l);
698                         *l = last;
699                 }
700         }
701
702         /*
703          *  look for a complete packet.  if we get to a fragment
704          *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
705          */
706         pktposn = 0;
707         for(bl = f->blist; bl; bl = bl->next) {
708                 if(BKFG(bl)->foff != pktposn)
709                         break;
710         
711                 fraghdr = (Fraghdr6 *) (bl->rp + uflen);
712                 if((fraghdr->offsetRM[1] & 1) == 0) {
713
714                         bl = f->blist;
715
716                         /* get rid of frag header in first fragment */
717
718                         memmove(bl->rp + IP6FHDR, bl->rp, uflen);
719                         bl->rp += IP6FHDR;
720                         len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
721                         bl->wp = bl->rp + len + IP6HDR;
722
723                         /* Pullup all the fragment headers and
724                          * return a complete packet
725                          */
726                         for(bl = bl->next; bl; bl = bl->next) {
727                                 fragsize = BKFG(bl)->flen;
728                                 len += fragsize;
729                                 bl->rp += uflen + IP6FHDR;
730                                 bl->wp = bl->rp + fragsize;
731                         }
732
733                         bl = f->blist;
734                         f->blist = nil;
735                         ipfragfree6(ip, f);
736                         ih = (Ip6hdr*)(bl->rp);
737                         hnputs(ih->ploadlen, len);
738                         qunlock(&ip->fraglock6);
739                         ip->stats[ReasmOKs]++;
740                         return bl;              
741                 }
742                 pktposn += BKFG(bl)->flen;
743         }
744         qunlock(&ip->fraglock6);
745         return nil;
746 }
747