BSD sockets UDP uses 'headers' for all packets
[akaros.git] / kern / src / net / udp.c
1 // INFERNO
2 #define DEBUG
3 #include <vfs.h>
4 #include <kfs.h>
5 #include <slab.h>
6 #include <kmalloc.h>
7 #include <kref.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <error.h>
12 #include <cpio.h>
13 #include <pmap.h>
14 #include <smp.h>
15 #include <ip.h>
16
17 #include <vfs.h>
18 #include <kfs.h>
19 #include <slab.h>
20 #include <kmalloc.h>
21 #include <kref.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <assert.h>
25 #include <error.h>
26 #include <cpio.h>
27 #include <pmap.h>
28 #include <smp.h>
29 #include <ip.h>
30
31 #define DPRINT if(0)print
32
33 enum {
34         UDP_UDPHDR_SZ = 8,
35
36         UDP4_PHDR_OFF = 8,
37         UDP4_PHDR_SZ = 12,
38         UDP4_IPHDR_SZ = 20,
39         UDP6_IPHDR_SZ = 40,
40         UDP6_PHDR_SZ = 40,
41         UDP6_PHDR_OFF = 0,
42
43         IP_UDPPROTO = 17,
44         UDP_USEAD7 = 52,
45         UDP_USEAD6 = 36,
46
47         Udprxms = 200,
48         Udptickms = 100,
49         Udpmaxxmit = 10,
50 };
51
52 typedef struct Udp4hdr Udp4hdr;
53 struct Udp4hdr {
54         /* ip header */
55         uint8_t vihl;                           /* Version and header length */
56         uint8_t tos;                            /* Type of service */
57         uint8_t length[2];                      /* packet length */
58         uint8_t id[2];                          /* Identification */
59         uint8_t frag[2];                        /* Fragment information */
60         uint8_t Unused;
61         uint8_t udpproto;                       /* Protocol */
62         uint8_t udpplen[2];                     /* Header plus data length */
63         uint8_t udpsrc[IPv4addrlen];    /* Ip source */
64         uint8_t udpdst[IPv4addrlen];    /* Ip destination */
65
66         /* udp header */
67         uint8_t udpsport[2];            /* Source port */
68         uint8_t udpdport[2];            /* Destination port */
69         uint8_t udplen[2];                      /* data length */
70         uint8_t udpcksum[2];            /* Checksum */
71 };
72
73 typedef struct Udp6hdr Udp6hdr;
74 struct Udp6hdr {
75         uint8_t viclfl[4];
76         uint8_t len[2];
77         uint8_t nextheader;
78         uint8_t hoplimit;
79         uint8_t udpsrc[IPaddrlen];
80         uint8_t udpdst[IPaddrlen];
81
82         /* udp header */
83         uint8_t udpsport[2];            /* Source port */
84         uint8_t udpdport[2];            /* Destination port */
85         uint8_t udplen[2];                      /* data length */
86         uint8_t udpcksum[2];            /* Checksum */
87 };
88
89 /* MIB II counters */
90 typedef struct Udpstats Udpstats;
91 struct Udpstats {
92         uint32_t udpInDatagrams;
93         uint32_t udpNoPorts;
94         uint32_t udpInErrors;
95         uint32_t udpOutDatagrams;
96 };
97
98 typedef struct Udppriv Udppriv;
99 struct Udppriv {
100         struct Ipht ht;
101
102         /* MIB counters */
103         Udpstats ustats;
104
105         /* non-MIB stats */
106         uint32_t csumerr;                       /* checksum errors */
107         uint32_t lenerr;                        /* short packet */
108 };
109
110 void (*etherprofiler) (char *name, int qlen);
111 void udpkick(void *x, struct block *bp);
112
113 /*
114  *  protocol specific part of Conv
115  */
116 typedef struct Udpcb Udpcb;
117 struct Udpcb {
118         qlock_t qlock;
119         uint8_t headers;
120 };
121
122 static char *udpconnect(struct conv *c, char **argv, int argc)
123 {
124         char *e;
125         Udppriv *upriv;
126
127         upriv = c->p->priv;
128         e = Fsstdconnect(c, argv, argc);
129         Fsconnected(c, e);
130         if (e != NULL)
131                 return e;
132
133         iphtadd(&upriv->ht, c);
134         return NULL;
135 }
136
137 static int udpstate(struct conv *c, char *state, int n)
138 {
139         return snprintf(state, n, "%s qin %d qout %d",
140                                         c->inuse ? "Open" : "Closed",
141                                         c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0);
142 }
143
144 static char *udpannounce(struct conv *c, char **argv, int argc)
145 {
146         char *e;
147         Udppriv *upriv;
148
149         upriv = c->p->priv;
150         e = Fsstdannounce(c, argv, argc);
151         if (e != NULL)
152                 return e;
153         Fsconnected(c, NULL);
154         iphtadd(&upriv->ht, c);
155
156         return NULL;
157 }
158
159 static void udpcreate(struct conv *c)
160 {
161         c->rq = qopen(64 * 1024, Qmsg, 0, 0);
162         c->wq = qbypass(udpkick, c);
163 }
164
165 static void udpclose(struct conv *c)
166 {
167         Udpcb *ucb;
168         Udppriv *upriv;
169
170         upriv = c->p->priv;
171         iphtrem(&upriv->ht, c);
172
173         c->state = 0;
174         qclose(c->rq);
175         qclose(c->wq);
176         qclose(c->eq);
177         ipmove(c->laddr, IPnoaddr);
178         ipmove(c->raddr, IPnoaddr);
179         c->lport = 0;
180         c->rport = 0;
181
182         ucb = (Udpcb *) c->ptcl;
183         ucb->headers = 0;
184
185         qunlock(&c->qlock);
186 }
187
188 void udpkick(void *x, struct block *bp)
189 {
190         struct conv *c = x;
191         Udp4hdr *uh4;
192         Udp6hdr *uh6;
193         uint16_t rport;
194         uint8_t laddr[IPaddrlen], raddr[IPaddrlen];
195         Udpcb *ucb;
196         int dlen, ptcllen;
197         Udppriv *upriv;
198         struct Fs *f;
199         int version;
200         struct conv *rc;
201
202         upriv = c->p->priv;
203         assert(upriv);
204         f = c->p->f;
205
206         netlog(c->p->f, Logudp, "udp: kick\n");
207         if (bp == NULL)
208                 return;
209
210         ucb = (Udpcb *) c->ptcl;
211         switch (ucb->headers) {
212                 case 7:
213                         /* get user specified addresses */
214                         bp = pullupblock(bp, UDP_USEAD7);
215                         if (bp == NULL)
216                                 return;
217                         ipmove(raddr, bp->rp);
218                         bp->rp += IPaddrlen;
219                         ipmove(laddr, bp->rp);
220                         bp->rp += IPaddrlen;
221                         /* pick interface closest to dest */
222                         if (ipforme(f, laddr) != Runi)
223                                 findlocalip(f, laddr, raddr);
224                         bp->rp += IPaddrlen;    /* Ignore ifc address */
225                         rport = nhgets(bp->rp);
226                         bp->rp += 2 + 2;        /* Ignore local port */
227                         break;
228                 case 6:
229                         /* get user specified addresses */
230                         bp = pullupblock(bp, UDP_USEAD6);
231                         if (bp == NULL)
232                                 return;
233                         ipmove(raddr, bp->rp);
234                         bp->rp += IPaddrlen;
235                         ipmove(laddr, bp->rp);
236                         bp->rp += IPaddrlen;
237                         /* pick interface closest to dest */
238                         if (ipforme(f, laddr) != Runi)
239                                 findlocalip(f, laddr, raddr);
240                         rport = nhgets(bp->rp);
241                         bp->rp += 2 + 2;        /* Ignore local port */
242                         break;
243                 default:
244                         rport = 0;
245                         break;
246         }
247
248         if (ucb->headers) {
249                 if (memcmp(laddr, v4prefix, IPv4off) == 0 ||
250                         ipcmp(laddr, IPnoaddr) == 0)
251                         version = V4;
252                 else
253                         version = V6;
254         } else {
255                 if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
256                          memcmp(c->laddr, v4prefix, IPv4off) == 0)
257                         || ipcmp(c->raddr, IPnoaddr) == 0)
258                         version = V4;
259                 else
260                         version = V6;
261         }
262
263         dlen = blocklen(bp);
264
265         /* fill in pseudo header and compute checksum */
266         switch (version) {
267                 case V4:
268                         bp = padblock(bp, UDP4_IPHDR_SZ + UDP_UDPHDR_SZ);
269                         if (bp == NULL)
270                                 return;
271
272                         uh4 = (Udp4hdr *) (bp->rp);
273                         ptcllen = dlen + UDP_UDPHDR_SZ;
274                         uh4->Unused = 0;
275                         uh4->udpproto = IP_UDPPROTO;
276                         uh4->frag[0] = 0;
277                         uh4->frag[1] = 0;
278                         hnputs(uh4->udpplen, ptcllen);
279                         if (ucb->headers) {
280                                 v6tov4(uh4->udpdst, raddr);
281                                 hnputs(uh4->udpdport, rport);
282                                 v6tov4(uh4->udpsrc, laddr);
283                                 rc = NULL;
284                         } else {
285                                 v6tov4(uh4->udpdst, c->raddr);
286                                 hnputs(uh4->udpdport, c->rport);
287                                 if (ipcmp(c->laddr, IPnoaddr) == 0)
288                                         findlocalip(f, c->laddr, c->raddr);
289                                 v6tov4(uh4->udpsrc, c->laddr);
290                                 rc = c;
291                         }
292                         hnputs(uh4->udpsport, c->lport);
293                         hnputs(uh4->udplen, ptcllen);
294                         uh4->udpcksum[0] = 0;
295                         uh4->udpcksum[1] = 0;
296                         hnputs(uh4->udpcksum,
297                                    ptclcsum(bp, UDP4_PHDR_OFF,
298                                                         dlen + UDP_UDPHDR_SZ + UDP4_PHDR_SZ));
299                         uh4->vihl = IP_VER4;
300                         ipoput4(f, bp, 0, c->ttl, c->tos, rc);
301                         break;
302
303                 case V6:
304                         bp = padblock(bp, UDP6_IPHDR_SZ + UDP_UDPHDR_SZ);
305                         if (bp == NULL)
306                                 return;
307
308                         // using the v6 ip header to create pseudo header 
309                         // first then reset it to the normal ip header
310                         uh6 = (Udp6hdr *) (bp->rp);
311                         memset(uh6, 0, 8);
312                         ptcllen = dlen + UDP_UDPHDR_SZ;
313                         hnputl(uh6->viclfl, ptcllen);
314                         uh6->hoplimit = IP_UDPPROTO;
315                         if (ucb->headers) {
316                                 ipmove(uh6->udpdst, raddr);
317                                 hnputs(uh6->udpdport, rport);
318                                 ipmove(uh6->udpsrc, laddr);
319                                 rc = NULL;
320                         } else {
321                                 ipmove(uh6->udpdst, c->raddr);
322                                 hnputs(uh6->udpdport, c->rport);
323                                 if (ipcmp(c->laddr, IPnoaddr) == 0)
324                                         findlocalip(f, c->laddr, c->raddr);
325                                 ipmove(uh6->udpsrc, c->laddr);
326                                 rc = c;
327                         }
328                         hnputs(uh6->udpsport, c->lport);
329                         hnputs(uh6->udplen, ptcllen);
330                         uh6->udpcksum[0] = 0;
331                         uh6->udpcksum[1] = 0;
332                         hnputs(uh6->udpcksum,
333                                    ptclcsum(bp, UDP6_PHDR_OFF,
334                                                         dlen + UDP_UDPHDR_SZ + UDP6_PHDR_SZ));
335                         memset(uh6, 0, 8);
336                         uh6->viclfl[0] = IP_VER6;
337                         hnputs(uh6->len, ptcllen);
338                         uh6->nextheader = IP_UDPPROTO;
339                         ipoput6(f, bp, 0, c->ttl, c->tos, rc);
340                         break;
341
342                 default:
343                         panic("udpkick: version %d", version);
344         }
345         upriv->ustats.udpOutDatagrams++;
346 }
347
348 void udpiput(struct Proto *udp, struct Ipifc *ifc, struct block *bp)
349 {
350         int len;
351         Udp4hdr *uh4;
352         Udp6hdr *uh6;
353         struct conv *c;
354         Udpcb *ucb;
355         uint8_t raddr[IPaddrlen], laddr[IPaddrlen];
356         uint16_t rport, lport;
357         Udppriv *upriv;
358         struct Fs *f;
359         int version;
360         int ottl, oviclfl, olen;
361         uint8_t *p;
362
363         upriv = udp->priv;
364         f = udp->f;
365         upriv->ustats.udpInDatagrams++;
366
367         uh4 = (Udp4hdr *) (bp->rp);
368         version = ((uh4->vihl & 0xF0) == IP_VER6) ? V6 : V4;
369
370         /*
371          * Put back pseudo header for checksum 
372          * (remember old values for icmpnoconv())
373          */
374         switch (version) {
375                 case V4:
376                         ottl = uh4->Unused;
377                         uh4->Unused = 0;
378                         len = nhgets(uh4->udplen);
379                         olen = nhgets(uh4->udpplen);
380                         hnputs(uh4->udpplen, len);
381
382                         v4tov6(raddr, uh4->udpsrc);
383                         v4tov6(laddr, uh4->udpdst);
384                         lport = nhgets(uh4->udpdport);
385                         rport = nhgets(uh4->udpsport);
386
387                         if (nhgets(uh4->udpcksum)) {
388                                 if (ptclcsum(bp, UDP4_PHDR_OFF, len + UDP4_PHDR_SZ)) {
389                                         upriv->ustats.udpInErrors++;
390                                         netlog(f, Logudp, "udp: checksum error %I\n", raddr);
391                                         printd("udp: checksum error %I\n", raddr);
392                                         freeblist(bp);
393                                         return;
394                                 }
395                         }
396                         uh4->Unused = ottl;
397                         hnputs(uh4->udpplen, olen);
398                         break;
399                 case V6:
400                         uh6 = (Udp6hdr *) (bp->rp);
401                         len = nhgets(uh6->udplen);
402                         oviclfl = nhgetl(uh6->viclfl);
403                         olen = nhgets(uh6->len);
404                         ottl = uh6->hoplimit;
405                         ipmove(raddr, uh6->udpsrc);
406                         ipmove(laddr, uh6->udpdst);
407                         lport = nhgets(uh6->udpdport);
408                         rport = nhgets(uh6->udpsport);
409                         memset(uh6, 0, 8);
410                         hnputl(uh6->viclfl, len);
411                         uh6->hoplimit = IP_UDPPROTO;
412                         if (ptclcsum(bp, UDP6_PHDR_OFF, len + UDP6_PHDR_SZ)) {
413                                 upriv->ustats.udpInErrors++;
414                                 netlog(f, Logudp, "udp: checksum error %I\n", raddr);
415                                 printd("udp: checksum error %I\n", raddr);
416                                 freeblist(bp);
417                                 return;
418                         }
419                         hnputl(uh6->viclfl, oviclfl);
420                         hnputs(uh6->len, olen);
421                         uh6->nextheader = IP_UDPPROTO;
422                         uh6->hoplimit = ottl;
423                         break;
424                 default:
425                         panic("udpiput: version %d", version);
426                         return; /* to avoid a warning */
427         }
428
429         qlock(&udp->qlock);
430
431         c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
432         if (c == NULL) {
433                 /* no converstation found */
434                 upriv->ustats.udpNoPorts++;
435                 qunlock(&udp->qlock);
436                 netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
437                            laddr, lport);
438
439                 switch (version) {
440                         case V4:
441                                 icmpnoconv(f, bp);
442                                 break;
443                         case V6:
444                                 icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
445                                 break;
446                         default:
447                                 panic("udpiput2: version %d", version);
448                 }
449
450                 freeblist(bp);
451                 return;
452         }
453         ucb = (Udpcb *) c->ptcl;
454
455         if (c->state == Announced) {
456                 if (ucb->headers == 0) {
457                         /* create a new conversation */
458                         if (ipforme(f, laddr) != Runi) {
459                                 switch (version) {
460                                         case V4:
461                                                 v4tov6(laddr, ifc->lifc->local);
462                                                 break;
463                                         case V6:
464                                                 ipmove(laddr, ifc->lifc->local);
465                                                 break;
466                                         default:
467                                                 panic("udpiput3: version %d", version);
468                                 }
469                         }
470                         c = Fsnewcall(c, raddr, rport, laddr, lport, version);
471                         if (c == NULL) {
472                                 qunlock(&udp->qlock);
473                                 freeblist(bp);
474                                 return;
475                         }
476                         iphtadd(&upriv->ht, c);
477                         ucb = (Udpcb *) c->ptcl;
478                 }
479         }
480
481         qlock(&c->qlock);
482         qunlock(&udp->qlock);
483
484         /*
485          * Trim the packet down to data size
486          */
487         len -= UDP_UDPHDR_SZ;
488         switch (version) {
489                 case V4:
490                         bp = trimblock(bp, UDP4_IPHDR_SZ + UDP_UDPHDR_SZ, len);
491                         break;
492                 case V6:
493                         bp = trimblock(bp, UDP6_IPHDR_SZ + UDP_UDPHDR_SZ, len);
494                         break;
495                 default:
496                         bp = NULL;
497                         panic("udpiput4: version %d", version);
498         }
499         if (bp == NULL) {
500                 qunlock(&c->qlock);
501                 netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
502                            laddr, lport);
503                 upriv->lenerr++;
504                 return;
505         }
506
507         netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
508                    laddr, lport, len);
509
510         switch (ucb->headers) {
511                 case 7:
512                         /* pass the src address */
513                         bp = padblock(bp, UDP_USEAD7);
514                         p = bp->rp;
515                         ipmove(p, raddr);
516                         p += IPaddrlen;
517                         ipmove(p, laddr);
518                         p += IPaddrlen;
519                         ipmove(p, ifc->lifc->local);
520                         p += IPaddrlen;
521                         hnputs(p, rport);
522                         p += 2;
523                         hnputs(p, lport);
524                         break;
525                 case 6:
526                         /* pass the src address */
527                         bp = padblock(bp, UDP_USEAD6);
528                         p = bp->rp;
529                         ipmove(p, raddr);
530                         p += IPaddrlen;
531                         ipmove(p, ipforme(f, laddr) == Runi ? laddr : ifc->lifc->local);
532                         p += IPaddrlen;
533                         hnputs(p, rport);
534                         p += 2;
535                         hnputs(p, lport);
536                         break;
537         }
538
539         if (bp->next)
540                 bp = concatblock(bp);
541
542         if (qfull(c->rq)) {
543                 qunlock(&c->qlock);
544                 netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
545                            laddr, lport);
546                 freeblist(bp);
547                 return;
548         }
549
550         qpass(c->rq, bp);
551         qunlock(&c->qlock);
552
553 }
554
555 char *udpctl(struct conv *c, char **f, int n)
556 {
557         Udpcb *ucb;
558
559         ucb = (Udpcb *) c->ptcl;
560         if (n == 1) {
561                 if (strcmp(f[0], "oldheaders") == 0) {
562                         ucb->headers = 6;
563                         return NULL;
564                 } else if (strcmp(f[0], "headers") == 0) {
565                         ucb->headers = 7;
566                         return NULL;
567                 }
568         }
569         return "unknown control request";
570 }
571
572 void udpadvise(struct Proto *udp, struct block *bp, char *msg)
573 {
574         Udp4hdr *h4;
575         Udp6hdr *h6;
576         uint8_t source[IPaddrlen], dest[IPaddrlen];
577         uint16_t psource, pdest;
578         struct conv *s, **p;
579         int version;
580
581         h4 = (Udp4hdr *) (bp->rp);
582         version = ((h4->vihl & 0xF0) == IP_VER6) ? V6 : V4;
583
584         switch (version) {
585                 case V4:
586                         v4tov6(dest, h4->udpdst);
587                         v4tov6(source, h4->udpsrc);
588                         psource = nhgets(h4->udpsport);
589                         pdest = nhgets(h4->udpdport);
590                         break;
591                 case V6:
592                         h6 = (Udp6hdr *) (bp->rp);
593                         ipmove(dest, h6->udpdst);
594                         ipmove(source, h6->udpsrc);
595                         psource = nhgets(h6->udpsport);
596                         pdest = nhgets(h6->udpdport);
597                         break;
598                 default:
599                         panic("udpadvise: version %d", version);
600                         return; /* to avoid a warning */
601         }
602
603         /* Look for a connection */
604         qlock(&udp->qlock);
605         for (p = udp->conv; *p; p++) {
606                 s = *p;
607                 if (s->rport == pdest)
608                         if (s->lport == psource)
609                                 if (ipcmp(s->raddr, dest) == 0)
610                                         if (ipcmp(s->laddr, source) == 0) {
611                                                 if (s->ignoreadvice)
612                                                         break;
613                                                 qlock(&s->qlock);
614                                                 qunlock(&udp->qlock);
615                                                 qhangup(s->rq, msg);
616                                                 qhangup(s->wq, msg);
617                                                 qunlock(&s->qlock);
618                                                 freeblist(bp);
619                                                 return;
620                                         }
621         }
622         qunlock(&udp->qlock);
623         freeblist(bp);
624 }
625
626 int udpstats(struct Proto *udp, char *buf, int len)
627 {
628         Udppriv *upriv;
629
630         upriv = udp->priv;
631         return snprintf(buf, len,
632                                         "InDatagrams: %lu\nNoPorts: %lu\nInErrors: %lu\nOutDatagrams: %lu\n",
633                                         upriv->ustats.udpInDatagrams, upriv->ustats.udpNoPorts,
634                                         upriv->ustats.udpInErrors, upriv->ustats.udpOutDatagrams);
635 }
636
637 void udpnewconv(struct Proto *udp, struct conv *conv)
638 {
639         /* Fsprotoclone alloc'd our priv struct and attached it to conv already.
640          * Now we need to init it */
641         struct Udpcb *ucb = (struct Udpcb *)conv->ptcl;
642         qlock_init(&ucb->qlock);
643 }
644
645 void udpinit(struct Fs *fs)
646 {
647         struct Proto *udp;
648
649         udp = kzmalloc(sizeof(struct Proto), 0);
650         udp->priv = kzmalloc(sizeof(Udppriv), 0);
651         udp->name = "udp";
652         udp->connect = udpconnect;
653         udp->announce = udpannounce;
654         udp->ctl = udpctl;
655         udp->state = udpstate;
656         udp->create = udpcreate;
657         udp->close = udpclose;
658         udp->rcv = udpiput;
659         udp->advise = udpadvise;
660         udp->stats = udpstats;
661         udp->ipproto = IP_UDPPROTO;
662         udp->nc = Nchans;
663         udp->newconv = udpnewconv;
664         udp->ptclsize = sizeof(Udpcb);
665
666         Fsproto(fs, udp);
667 }