Fixes kref in iproute
[akaros.git] / kern / src / net / ipv6.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 #include <vfs.h>
17 #include <kfs.h>
18 #include <slab.h>
19 #include <kmalloc.h>
20 #include <kref.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <assert.h>
24 #include <error.h>
25 #include <cpio.h>
26 #include <pmap.h>
27 #include <smp.h>
28 #include <ip.h>
29
30 enum
31 {
32         IP4HDR          = 20,           /* sizeof(Ip4hdr) */
33         IP6HDR          = 40,           /* sizeof(Ip6hdr) */
34         IP_HLEN4        = 0x05,         /* Header length in words */
35         IP_DF           = 0x4000,       /* Don't fragment */
36         IP_MF           = 0x2000,       /* More fragments */
37         IP6FHDR         = 8,            /* sizeof(Fraghdr6) */
38         IP_MAX          = (32*1024),    /* Maximum Internet packet size */
39 };
40
41 #define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
42 #define BLKIPVER(xp)    (((struct ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
43 /*
44  * This sleazy macro is stolen shamelessly from ip.c, see comment there.
45  */
46 #define BKFG(xp)        ((struct Ipfrag*)((xp)->base))
47 struct fragment6;
48
49 struct block*           ip6reassemble(struct IP*, int unused_int, struct block*, struct ip6hdr*);
50 void            ipfragfree6(struct IP*, struct fragment6*);
51 struct fragment6*       ipfragallo6(struct IP*);
52 static struct block*            procxtns(struct IP *ip,
53                                              struct block *bp, int doreasm);
54 int             unfraglen(struct block *bp, uint8_t *nexthdr, int setfh);
55 struct block*           procopts(struct block *bp);
56
57 /* MIB II counters */
58 enum
59 {
60         Forwarding,
61         DefaultTTL,
62         InReceives,
63         InHdrErrors,
64         InAddrErrors,
65         ForwDatagrams,
66         InUnknownProtos,
67         InDiscards,
68         InDelivers,
69         OutRequests,
70         OutDiscards,
71         OutNoRoutes,
72         ReasmTimeout,
73         ReasmReqds,
74         ReasmOKs,
75         ReasmFails,
76         FragOKs,
77         FragFails,
78         FragCreates,
79
80         Nstats,
81 };
82
83 static char *statnames[] =
84 {
85 [Forwarding]    "Forwarding",
86 [DefaultTTL]    "DefaultTTL",
87 [InReceives]    "InReceives",
88 [InHdrErrors]   "InHdrErrors",
89 [InAddrErrors]  "InAddrErrors",
90 [ForwDatagrams] "ForwDatagrams",
91 [InUnknownProtos]       "InUnknownProtos",
92 [InDiscards]    "InDiscards",
93 [InDelivers]    "InDelivers",
94 [OutRequests]   "OutRequests",
95 [OutDiscards]   "OutDiscards",
96 [OutNoRoutes]   "OutNoRoutes",
97 [ReasmTimeout]  "ReasmTimeout",
98 [ReasmReqds]    "ReasmReqds",
99 [ReasmOKs]      "ReasmOKs",
100 [ReasmFails]    "ReasmFails",
101 [FragOKs]       "FragOKs",
102 [FragFails]     "FragFails",
103 [FragCreates]   "FragCreates",
104 };
105
106 struct Fragment4
107 {
108         struct block*   blist;
109         struct fragment4*       next;
110         uint32_t        src;
111         uint32_t        dst;
112         uint16_t        id;
113         uint32_t        age;
114 };
115
116 struct fragment6
117 {
118         struct block*   blist;
119         struct fragment6*       next;
120         uint8_t         src[IPaddrlen];
121         uint8_t         dst[IPaddrlen];
122         unsigned int    id;
123         uint32_t        age;
124 };
125
126 struct Ipfrag
127 {
128         uint16_t        foff;
129         uint16_t        flen;
130 };
131
132 /* an instance of IP */
133 struct IP
134 {
135         uint32_t                stats[Nstats];
136
137         qlock_t         fraglock4;
138         struct fragment4*       flisthead4;
139         struct fragment4*       fragfree4;
140         struct kref             id4;
141
142         qlock_t         fraglock6;
143         struct fragment6*       flisthead6;
144         struct fragment6*       fragfree6;
145         struct kref             id6;
146
147         int             iprouting;      /* true if we route like a gateway */
148 };
149
150 int
151 ipoput6(struct Fs *f,
152         struct block *bp, int gating, int ttl, int tos, struct conv *c)
153 {
154         ERRSTACK(2);
155         int tentative;
156         struct Ipifc *ifc;
157         uint8_t *gate, nexthdr;
158         struct ip6hdr *eh;
159         int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
160         struct route *r, *sr;
161         struct fraghdr6 fraghdr;
162         struct block *xp, *nb;
163         struct IP *ip;
164         int rv = 0;
165
166         ip = f->ip;
167
168         /* Fill out the ip header */
169         eh = (struct ip6hdr*)(bp->rp);
170
171         ip->stats[OutRequests]++;
172
173         /* Number of uint8_ts in data and ip header to write */
174         len = blocklen(bp);
175         
176         tentative = iptentative(f, eh->src);
177         if(tentative){
178                 netlog(f, Logip, "reject tx of packet with tentative src address\n");
179                 goto free;
180         }
181
182         if(gating){
183                 chunk = nhgets(eh->ploadlen);
184                 if(chunk > len){
185                         ip->stats[OutDiscards]++;
186                         netlog(f, Logip, "short gated packet\n");
187                         goto free;
188                 }
189                 if(chunk + IPV6HDR_LEN < len)
190                         len = chunk + IPV6HDR_LEN;
191         }
192
193         if(len >= IP_MAX){
194 //              print("len > IP_MAX, free\n");
195                 ip->stats[OutDiscards]++;
196                 netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
197                 goto free;
198         }
199
200         r = v6lookup(f, eh->dst, c);
201         if(r == NULL){
202 //              print("no route for %I, src %I free\n", eh->dst, eh->src);
203                 ip->stats[OutNoRoutes]++;
204                 netlog(f, Logip, "no interface %I\n", eh->dst);
205                 rv = -1;
206                 goto free;
207         }
208
209         ifc = r->rt.ifc;
210         if(r->rt.type & (Rifc|Runi))
211                 gate = eh->dst;
212         else
213         if(r->rt.type & (Rbcast|Rmulti)) {
214                 gate = eh->dst;
215                 sr = v6lookup(f, eh->src, NULL);
216                 if(sr != NULL && (sr->rt.type & Runi))
217                         ifc = sr->rt.ifc;
218         }
219         else
220                 gate = r->v6.gate;
221
222         if(!gating)
223                 eh->vcf[0] = IP_VER6;
224         eh->ttl = ttl;
225         if(!gating) {
226                 eh->vcf[0] |= (tos >> 4);
227                 eh->vcf[1] = (tos << 4);
228         }
229
230         if(!canrlock(&ifc->rwlock)) {
231                 goto free;
232         }
233
234         if(waserror()){
235                 runlock(&ifc->rwlock);
236                 nexterror();
237         }
238
239         if(ifc->m == NULL) {
240                 goto raise;
241         }
242
243         /* If we dont need to fragment just send it */
244         medialen = ifc->maxtu - ifc->m->hsize;
245         if(len <= medialen) {
246                 hnputs(eh->ploadlen, len-IPV6HDR_LEN);
247                 ifc->m->bwrite(ifc, bp, V6, gate);
248                 runlock(&ifc->rwlock);
249                 poperror();
250                 return 0;
251         }
252
253         if(gating) 
254         if(ifc->reassemble <= 0) {
255
256                 /* v6 intermediate nodes are not supposed to fragment pkts;
257                    we fragment if ifc->reassemble is turned on; an exception
258                    needed for nat.
259                  */
260
261                 ip->stats[OutDiscards]++;
262                 icmppkttoobig6(f, ifc, bp);
263                 netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
264                 goto raise;
265         }
266                 
267         /* start v6 fragmentation */
268         uflen = unfraglen(bp, &nexthdr, 1);
269         if(uflen > medialen) {
270                 ip->stats[FragFails]++;
271                 ip->stats[OutDiscards]++;
272                 netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
273                 goto raise;
274         }
275
276         flen = len - uflen;
277         seglen = (medialen - (uflen + IP6FHDR)) & ~7;
278         if(seglen < 8) {
279                 ip->stats[FragFails]++;
280                 ip->stats[OutDiscards]++;
281                 netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
282                 goto raise;
283         }
284
285         lid = kref_next(&ip->id6);
286         fraghdr.nexthdr = nexthdr;
287         fraghdr.res = 0;
288         hnputl(fraghdr.id, lid);
289
290         xp = bp;
291         offset = uflen;
292         while (xp != NULL && offset && offset >= BLEN(xp)) {
293                 offset -= BLEN(xp);
294                 xp = xp->next;
295         }
296         xp->rp += offset;
297
298         fragoff = 0; 
299         morefrags = 1;
300
301         for(; fragoff < flen; fragoff += seglen) {
302                 nb = allocb(uflen + IP6FHDR + seglen);
303
304                 if(fragoff + seglen >= flen) {
305                         seglen = flen - fragoff;
306                         morefrags = 0;
307                 }
308
309                 hnputs(eh->ploadlen, seglen+IP6FHDR);
310                 memmove(nb->wp, eh, uflen);
311                 nb->wp += uflen;
312
313                 hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
314                 fraghdr.offsetRM[1] |= morefrags;
315                 memmove(nb->wp, &fraghdr, IP6FHDR);
316                 nb->wp += IP6FHDR;
317
318                 /* Copy data */
319                 chunk = seglen;
320                 while (chunk) {
321                         if(!xp) {
322                                 ip->stats[OutDiscards]++;
323                                 ip->stats[FragFails]++;
324                                 freeblist(nb);
325                                 netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
326                                 goto raise;
327                         }
328                         blklen = chunk;
329                         if(BLEN(xp) < chunk)
330                                 blklen = BLEN(xp);
331                         memmove(nb->wp, xp->rp, blklen);
332
333                         nb->wp += blklen;
334                         xp->rp += blklen;
335                         chunk -= blklen;
336                         if(xp->rp == xp->wp)
337                                 xp = xp->next; 
338                 }
339
340                 ifc->m->bwrite(ifc, nb, V6, gate);
341                 ip->stats[FragCreates]++;
342         }
343         ip->stats[FragOKs]++;
344
345 raise:
346         runlock(&ifc->rwlock);
347         poperror();
348 free:
349         freeblist(bp);  
350         return rv;
351 }
352
353 void
354 ipiput6(struct Fs *f, struct Ipifc *ifc, struct block *bp)
355 {
356         int hl;
357         int hop, tos;
358         uint8_t proto;
359         struct ip6hdr *h;
360         struct Proto *p;
361         int notforme;
362         int tentative;
363         uint8_t v6dst[IPaddrlen];
364         struct IP *ip;
365         struct route *r, *sr;
366
367         ip = f->ip;
368         ip->stats[InReceives]++;
369
370         /*
371          *  Ensure we have all the header info in the first
372          *  block.  Make life easier for other protocols by
373          *  collecting up to the first 64 bytes in the first block.
374          */
375         if(BLEN(bp) < 64) {
376                 hl = blocklen(bp);
377                 if(hl < IP6HDR)
378                         hl = IP6HDR;
379                 if(hl > 64)
380                         hl = 64;
381                 bp = pullupblock(bp, hl);
382                 if(bp == NULL)
383                         return;
384         }
385
386         h = (struct ip6hdr *)(bp->rp);
387
388         memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
389         notforme = ipforme(f, v6dst) == 0;
390         tentative = iptentative(f, v6dst);
391   
392         if(tentative && (h->proto != ICMPv6)) {
393                 printd("tentative addr, drop\n");
394                 freeblist(bp);
395                 return;
396         }
397
398         /* Check header version */
399         if(BLKIPVER(bp) != IP_VER6) {
400                 ip->stats[InHdrErrors]++;
401                 netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
402                 freeblist(bp);
403                 return;
404         }
405
406         /* route */
407         if(notforme) {
408                 if(!ip->iprouting){
409                         freeb(bp);
410                         return;
411                 }
412                 /* don't forward to source's network */
413                 sr = v6lookup(f, h->src, NULL);
414                 r = v6lookup(f, h->dst, NULL);
415
416                 if(r == NULL || sr == r){
417                         ip->stats[OutDiscards]++;
418                         freeblist(bp);
419                         return;
420                 }
421
422                 /* don't forward if packet has timed out */
423                 hop = h->ttl;
424                 if(hop < 1) {
425                         ip->stats[InHdrErrors]++;
426                         icmpttlexceeded6(f, ifc, bp);
427                         freeblist(bp);
428                         return;
429                 }
430
431                 /* process headers & reassemble if the interface expects it */
432                 bp = procxtns(ip, bp, r->rt.ifc->reassemble);
433
434                 if(bp == NULL)
435                         return;
436
437                 ip->stats[ForwDatagrams]++;
438                 h = (struct ip6hdr *) (bp->rp);
439                 tos = IPV6CLASS(h);
440                 hop = h->ttl;
441                 ipoput6(f, bp, 1, hop-1, tos, NULL);
442                 return;
443         }
444
445         /* reassemble & process headers if needed */
446         bp = procxtns(ip, bp, 1);
447
448         if(bp == NULL)
449                 return;
450
451         h = (struct ip6hdr *) (bp->rp);
452         proto = h->proto;
453         p = Fsrcvpcol(f, proto);
454         if(p != NULL && p->rcv != NULL) {
455                 ip->stats[InDelivers]++;
456                 (*p->rcv)(p, ifc, bp);
457                 return;
458         }
459
460         ip->stats[InDiscards]++;
461         ip->stats[InUnknownProtos]++;
462         freeblist(bp);
463 }
464
465 /*
466  * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
467  */
468 void
469 ipfragfree6(struct IP *ip, struct fragment6 *frag)
470 {
471         struct fragment6 *fl, **l;
472
473         if(frag->blist)
474                 freeblist(frag->blist);
475
476         memset(frag->src, 0, IPaddrlen);
477         frag->id = 0;
478         frag->blist = NULL;
479
480         l = &ip->flisthead6;
481         for(fl = *l; fl; fl = fl->next) {
482                 if(fl == frag) {
483                         *l = frag->next;
484                         break;
485                 }
486                 l = &fl->next;
487         }
488
489         frag->next = ip->fragfree6;
490         ip->fragfree6 = frag;
491
492 }
493
494 /*
495  * ipfragallo6 - copied from ipfragalloc4
496  */
497 struct fragment6*
498 ipfragallo6(struct IP *ip)
499 {
500         struct fragment6 *f;
501
502         while(ip->fragfree6 == NULL) {
503                 /* free last entry on fraglist */
504                 for(f = ip->flisthead6; f->next; f = f->next)
505                         ;
506                 ipfragfree6(ip, f);
507         }
508         f = ip->fragfree6;
509         ip->fragfree6 = f->next;
510         f->next = ip->flisthead6;
511         ip->flisthead6 = f;
512         f->age = NOW + 30000;
513
514         return f;
515 }
516
517 static struct block*
518 procxtns(struct IP *ip, struct block *bp, int doreasm) {
519
520         int offset;
521         uint8_t proto;
522         struct ip6hdr *h;
523
524         h = (struct ip6hdr *) (bp->rp);
525         offset = unfraglen(bp, &proto, 0);
526
527         if((proto == FH) && (doreasm != 0)) {
528                 bp = ip6reassemble(ip, offset, bp, h);
529                 if(bp == NULL) 
530                         return NULL; 
531                 offset = unfraglen(bp, &proto, 0);
532         }
533
534         if(proto == DOH || offset > IP6HDR) 
535                 bp = procopts(bp);
536
537         return bp;
538 }
539
540
541 /*      returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
542  *      hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
543  *      of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
544  *      field of the last header in the "Unfragmentable part" is set to FH.
545  */
546 int
547 unfraglen(struct block *bp, uint8_t *nexthdr, int setfh)
548 {
549         uint8_t *p, *q;
550         int ufl, hs;
551
552         p = bp->rp;
553         q = p+6;        /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
554         *nexthdr = *q;
555         ufl = IP6HDR;
556         p += ufl;
557
558         for(;;) {
559                 if(*nexthdr == HBH || *nexthdr == RH) {
560                         *nexthdr = *p;
561                         hs = ((int)*(p+1) + 1) * 8;
562                         ufl += hs;
563                         q = p;
564                         p += hs;
565                 }
566                 else
567                         break;
568         }
569
570         if(*nexthdr == FH)
571                 *q = *p;
572
573         if(setfh)
574                 *q = FH;
575
576         return ufl;
577 }
578
579 struct block*
580 procopts(struct block *bp)
581 {
582         return bp;
583 }
584
585 struct block*
586 ip6reassemble(struct IP* ip, int uflen, struct block* bp, struct ip6hdr* ih)
587 {
588
589         int fend, offset;
590         unsigned int id;
591         struct fragment6 *f, *fnext;
592         struct fraghdr6 *fraghdr;
593         uint8_t src[IPaddrlen], dst[IPaddrlen];
594         struct block *bl, **l, *last, *prev;
595         int ovlap, len, fragsize, pktposn;
596
597         fraghdr = (struct fraghdr6 *) (bp->rp + uflen);
598         memmove(src, ih->src, IPaddrlen);
599         memmove(dst, ih->dst, IPaddrlen);
600         id = nhgetl(fraghdr->id);
601         offset = nhgets(fraghdr->offsetRM) & ~7;
602
603         /*
604          *  block lists are too hard, pullupblock into a single block
605          */
606         if(bp->next){
607                 bp = pullupblock(bp, blocklen(bp));
608                 ih = (struct ip6hdr *)(bp->rp);
609         }
610
611
612         qlock(&ip->fraglock6);
613
614         /*
615          *  find a reassembly queue for this fragment
616          */
617         for(f = ip->flisthead6; f; f = fnext){
618                 fnext = f->next;
619                 if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
620                         break;
621                 if(f->age < NOW){
622                         ip->stats[ReasmTimeout]++;
623                         ipfragfree6(ip, f);
624                 }
625         }
626
627
628         /*
629          *  if this isn't a fragmented packet, accept it
630          *  and get rid of any fragments that might go
631          *  with it.
632          */
633         if(nhgets(fraghdr->offsetRM)==0) {      // first frag is also the last
634                 if(f != NULL) {
635                         ipfragfree6(ip, f);
636                         ip->stats[ReasmFails]++;
637                 }
638                 qunlock(&ip->fraglock6);
639                 return bp;
640         }
641
642         if(bp->base+sizeof(struct Ipfrag) >= bp->rp){
643                 bp = padblock(bp, sizeof(struct Ipfrag));
644                 bp->rp += sizeof(struct Ipfrag);
645         }
646
647         BKFG(bp)->foff = offset;
648         BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
649
650         /* First fragment allocates a reassembly queue */
651         if(f == NULL) {
652                 f = ipfragallo6(ip);
653                 f->id = id;
654                 memmove(f->src, src, IPaddrlen);
655                 memmove(f->dst, dst, IPaddrlen);
656
657                 f->blist = bp;
658
659                 qunlock(&ip->fraglock6);
660                 ip->stats[ReasmReqds]++;
661                 return NULL;
662         }
663
664         /*
665          *  find the new fragment's position in the queue
666          */
667         prev = NULL;
668         l = &f->blist;
669         bl = f->blist;
670         while(bl != NULL && BKFG(bp)->foff > BKFG(bl)->foff) {
671                 prev = bl;
672                 l = &bl->next;
673                 bl = bl->next;
674         }
675
676         /* Check overlap of a previous fragment - trim away as necessary */
677         if(prev) {
678                 ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
679                 if(ovlap > 0) {
680                         if(ovlap >= BKFG(bp)->flen) {
681                                 freeblist(bp);
682                                 qunlock(&ip->fraglock6);
683                                 return NULL;
684                         }
685                         BKFG(prev)->flen -= ovlap;
686                 }
687         }
688
689         /* Link onto assembly queue */
690         bp->next = *l;
691         *l = bp;
692
693         /* Check to see if succeeding segments overlap */
694         if(bp->next) {
695                 l = &bp->next;
696                 fend = BKFG(bp)->foff + BKFG(bp)->flen;
697
698                 /* Take completely covered segments out */
699
700                 while(*l) {
701                         ovlap = fend - BKFG(*l)->foff;
702
703                         if(ovlap <= 0) 
704                                 break; 
705                         if(ovlap < BKFG(*l)->flen) {
706                                 BKFG(*l)->flen -= ovlap;
707                                 BKFG(*l)->foff += ovlap;
708                                 /* move up ih hdrs */
709                                 memmove((*l)->rp + ovlap, (*l)->rp, uflen);
710                                 (*l)->rp += ovlap;
711                                 break;
712                         }
713                         last = (*l)->next;
714                         (*l)->next = NULL;
715                         freeblist(*l);
716                         *l = last;
717                 }
718         }
719
720         /*
721          *  look for a complete packet.  if we get to a fragment
722          *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
723          */
724         pktposn = 0;
725         for(bl = f->blist; bl; bl = bl->next) {
726                 if(BKFG(bl)->foff != pktposn)
727                         break;
728         
729                 fraghdr = (struct fraghdr6 *) (bl->rp + uflen);
730                 if((fraghdr->offsetRM[1] & 1) == 0) {
731                         bl = f->blist;
732
733                         /* get rid of frag header in first fragment */
734
735                         memmove(bl->rp + IP6FHDR, bl->rp, uflen);
736                         bl->rp += IP6FHDR;
737                         len = nhgets(((struct ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
738                         bl->wp = bl->rp + len + IP6HDR;
739
740                         /* Pullup all the fragment headers and
741                          * return a complete packet
742                          */
743                         for(bl = bl->next; bl; bl = bl->next) {
744                                 fragsize = BKFG(bl)->flen;
745                                 len += fragsize;
746                                 bl->rp += uflen + IP6FHDR;
747                                 bl->wp = bl->rp + fragsize;
748                         }
749
750                         bl = f->blist;
751                         f->blist = NULL;
752                         ipfragfree6(ip, f);
753                         ih = (struct ip6hdr*)(bl->rp);
754                         hnputs(ih->ploadlen, len);
755                         qunlock(&ip->fraglock6);
756                         ip->stats[ReasmOKs]++;
757                         return bl;              
758                 }
759                 pktposn += BKFG(bl)->flen;
760         }
761         qunlock(&ip->fraglock6);
762         return NULL;
763 }
764