akaros/kern/src/net/icmp.c
<<
>>
Prefs
   1/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   2 * Portions Copyright © 1997-1999 Vita Nuova Limited
   3 * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
   4 *                                (www.vitanuova.com)
   5 * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
   6 *
   7 * Modified for the Akaros operating system:
   8 * Copyright (c) 2013-2014 The Regents of the University of California
   9 * Copyright (c) 2013-2015 Google Inc.
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a copy
  12 * of this software and associated documentation files (the "Software"), to deal
  13 * in the Software without restriction, including without limitation the rights
  14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15 * copies of the Software, and to permit persons to whom the Software is
  16 * furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice shall be included in
  19 * all copies or substantial portions of the Software.
  20 *
  21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27 * SOFTWARE. */
  28
  29#include <slab.h>
  30#include <kmalloc.h>
  31#include <kref.h>
  32#include <string.h>
  33#include <stdio.h>
  34#include <assert.h>
  35#include <error.h>
  36#include <cpio.h>
  37#include <pmap.h>
  38#include <smp.h>
  39#include <net/ip.h>
  40
  41#include <slab.h>
  42#include <kmalloc.h>
  43#include <kref.h>
  44#include <string.h>
  45#include <stdio.h>
  46#include <assert.h>
  47#include <error.h>
  48#include <cpio.h>
  49#include <pmap.h>
  50#include <smp.h>
  51#include <net/ip.h>
  52
  53typedef struct Icmp {
  54        uint8_t vihl;                           /* Version and header length */
  55        uint8_t tos;                            /* Type of service */
  56        uint8_t length[2];                      /* packet length */
  57        uint8_t id[2];                          /* Identification */
  58        uint8_t frag[2];                        /* Fragment information */
  59        uint8_t ttl;                            /* Time to live */
  60        uint8_t proto;                          /* Protocol */
  61        uint8_t ipcksum[2];                     /* Header checksum */
  62        uint8_t src[4];                         /* Ip source */
  63        uint8_t dst[4];                         /* Ip destination */
  64        uint8_t type;
  65        uint8_t code;
  66        uint8_t cksum[2];
  67        uint8_t icmpid[2];
  68        uint8_t seq[2];
  69        uint8_t data[1];
  70} Icmp;
  71
  72enum {                                          /* Packet Types */
  73        EchoReply = 0,
  74        Unreachable = 3,
  75        SrcQuench = 4,
  76        Redirect = 5,
  77        EchoRequest = 8,
  78        TimeExceed = 11,
  79        InParmProblem = 12,
  80        Timestamp = 13,
  81        TimestampReply = 14,
  82        InfoRequest = 15,
  83        InfoReply = 16,
  84        AddrMaskRequest = 17,
  85        AddrMaskReply = 18,
  86
  87        Maxtype = 18,
  88};
  89
  90enum {
  91        MinAdvise = 24, /* minimum needed for us to advise another protocol */
  92};
  93
  94char *icmpnames[Maxtype + 1] = {
  95        [EchoReply] "EchoReply",
  96        [Unreachable] "Unreachable",
  97        [SrcQuench] "SrcQuench",
  98        [Redirect] "Redirect",
  99        [EchoRequest] "EchoRequest",
 100        [TimeExceed] "TimeExceed",
 101        [InParmProblem] "InParmProblem",
 102        [Timestamp] "Timestamp",
 103        [TimestampReply] "TimestampReply",
 104        [InfoRequest] "InfoRequest",
 105        [InfoReply] "InfoReply",
 106        [AddrMaskRequest] "AddrMaskRequest",
 107        [AddrMaskReply] "AddrMaskReply  ",
 108};
 109
 110enum {
 111        IP_ICMPPROTO = 1,
 112        ICMP_IPSIZE = 20,
 113        ICMP_HDRSIZE = 8,
 114};
 115
 116enum {
 117        InMsgs,
 118        InErrors,
 119        OutMsgs,
 120        CsumErrs,
 121        LenErrs,
 122        HlenErrs,
 123
 124        Nstats,
 125};
 126
 127static char *statnames[Nstats] = {
 128        [InMsgs] "InMsgs",
 129        [InErrors] "InErrors",
 130        [OutMsgs] "OutMsgs",
 131        [CsumErrs] "CsumErrs",
 132        [LenErrs] "LenErrs",
 133        [HlenErrs] "HlenErrs",
 134};
 135
 136typedef struct Icmppriv Icmppriv;
 137struct Icmppriv {
 138        uint32_t stats[Nstats];
 139
 140        /* message counts */
 141        uint32_t in[Maxtype + 1];
 142        uint32_t out[Maxtype + 1];
 143};
 144
 145static void icmpkick(void *x, struct block *);
 146
 147static void icmpcreate(struct conv *c)
 148{
 149        c->rq = qopen(64 * 1024, Qmsg, 0, c);
 150        c->wq = qbypass(icmpkick, c);
 151}
 152
 153void icmpconnect(struct conv *c, char **argv, int argc)
 154{
 155        Fsstdconnect(c, argv, argc);
 156        Fsconnected(c, 0);
 157}
 158
 159extern int icmpstate(struct conv *c, char *state, int n)
 160{
 161        return snprintf(state, n, "%s qin %d qout %d\n", "Datagram",
 162                        c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0);
 163}
 164
 165void icmpannounce(struct conv *c, char **argv, int argc)
 166{
 167        Fsstdannounce(c, argv, argc);
 168        Fsconnected(c, NULL);
 169}
 170
 171extern void icmpclose(struct conv *c)
 172{
 173        qclose(c->rq);
 174        qclose(c->wq);
 175        ipmove(c->laddr, IPnoaddr);
 176        ipmove(c->raddr, IPnoaddr);
 177        c->lport = 0;
 178}
 179
 180static void icmpkick(void *x, struct block *bp)
 181{
 182        struct conv *c = x;
 183        Icmp *p;
 184        Icmppriv *ipriv;
 185
 186        if (bp == NULL)
 187                return;
 188
 189        bp = pullupblock(bp, ICMP_IPSIZE + ICMP_HDRSIZE);
 190        if (bp == 0)
 191                return;
 192        p = (Icmp *) (bp->rp);
 193        p->vihl = IP_VER4;
 194        ipriv = c->p->priv;
 195        if (p->type <= Maxtype)
 196                ipriv->out[p->type]++;
 197
 198        v6tov4(p->dst, c->raddr);
 199        v6tov4(p->src, c->laddr);
 200        p->proto = IP_ICMPPROTO;
 201        hnputs(p->icmpid, c->lport);
 202        memset(p->cksum, 0, sizeof(p->cksum));
 203        hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
 204        ipriv->stats[OutMsgs]++;
 205        netlog(c->p->f, Logicmp,
 206               "icmp output: Type %s (%d,%d), To %V, TTL %d, ID %d, SEQ %d\n",
 207               icmpnames[MIN(p->type, Maxtype)], p->type, p->code, p->dst,
 208               p->ttl, nhgets(p->icmpid), nhgets(p->seq));
 209        ipoput4(c->p->f, bp, 0, c->ttl, c->tos, NULL);
 210}
 211
 212extern void icmpttlexceeded(struct Fs *f, uint8_t * ia, struct block *bp)
 213{
 214        struct block *nbp;
 215        Icmp *p, *np;
 216
 217        p = (Icmp *) bp->rp;
 218
 219        netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
 220        nbp = block_alloc(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8,
 221                          MEM_WAIT);
 222        nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 223        np = (Icmp *) nbp->rp;
 224        np->vihl = IP_VER4;
 225        memmove(np->dst, p->src, sizeof(np->dst));
 226        v6tov4(np->src, ia);
 227        memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 228        np->type = TimeExceed;
 229        np->code = 0;
 230        np->proto = IP_ICMPPROTO;
 231        hnputs(np->icmpid, 0);
 232        hnputs(np->seq, 0);
 233        memset(np->cksum, 0, sizeof(np->cksum));
 234        hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE,
 235                                   blocklen(nbp) - ICMP_IPSIZE));
 236        ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, NULL);
 237
 238}
 239
 240static void icmpunreachable(struct Fs *f, struct block *bp, int code, int seq)
 241{
 242        struct block *nbp;
 243        Icmp *p, *np;
 244        int i;
 245        uint8_t addr[IPaddrlen];
 246
 247        p = (Icmp *) bp->rp;
 248
 249        /* only do this for unicast sources and destinations */
 250        v4tov6(addr, p->dst);
 251        i = ipforme(f, addr);
 252        if ((i & Runi) == 0)
 253                return;
 254        v4tov6(addr, p->src);
 255        i = ipforme(f, addr);
 256        if (i != 0 && (i & Runi) == 0)
 257                return;
 258
 259        /* TODO: Clean this up or remove it.  This is for things like UDP port
 260         * unreachable.  But we might not be UDP, due to how the code is built.
 261         * Check the UDP netlog if you see this. */
 262        netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
 263        nbp = block_alloc(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8,
 264                          MEM_WAIT);
 265        nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
 266        np = (Icmp *) nbp->rp;
 267        np->vihl = IP_VER4;
 268        memmove(np->dst, p->src, sizeof(np->dst));
 269        memmove(np->src, p->dst, sizeof(np->src));
 270        memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
 271        np->type = Unreachable;
 272        np->code = code;
 273        np->proto = IP_ICMPPROTO;
 274        hnputs(np->icmpid, 0);
 275        hnputs(np->seq, seq);
 276        memset(np->cksum, 0, sizeof(np->cksum));
 277        hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE,
 278                                   blocklen(nbp) - ICMP_IPSIZE));
 279        ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, NULL);
 280}
 281
 282extern void icmpnoconv(struct Fs *f, struct block *bp)
 283{
 284        icmpunreachable(f, bp, 3, 0);
 285}
 286
 287extern void icmpcantfrag(struct Fs *f, struct block *bp, int mtu)
 288{
 289        icmpunreachable(f, bp, 4, mtu);
 290}
 291
 292static void goticmpkt(struct Proto *icmp, struct block *bp)
 293{
 294        struct conv **c, *s;
 295        Icmp *p;
 296        uint8_t dst[IPaddrlen];
 297        uint16_t recid;
 298
 299        p = (Icmp *) bp->rp;
 300        v4tov6(dst, p->src);
 301        recid = nhgets(p->icmpid);
 302
 303        for (c = icmp->conv; *c; c++) {
 304                s = *c;
 305                if (s->lport == recid)
 306                        if (ipcmp(s->raddr, dst) == 0) {
 307                                bp = concatblock(bp);
 308                                if (bp != NULL)
 309                                        qpass(s->rq, bp);
 310                                return;
 311                        }
 312        }
 313        freeblist(bp);
 314}
 315
 316static struct block *mkechoreply(struct Proto *icmp, struct block *bp)
 317{
 318        Icmp *q;
 319        uint8_t ip[4];
 320
 321        /* we're repurposing bp to send it back out.  we need to remove any
 322         * inbound checksum flags (which were saying the HW did the checksum)
 323         * and any other metadata.  We might need to fill in some of the
 324         * metadata too. */
 325        block_reset_metadata(bp);
 326        q = (Icmp *) bp->rp;
 327        q->vihl = IP_VER4;
 328        memmove(ip, q->src, sizeof(q->dst));
 329        memmove(q->src, q->dst, sizeof(q->src));
 330        memmove(q->dst, ip, sizeof(q->dst));
 331        q->type = EchoReply;
 332        memset(q->cksum, 0, sizeof(q->cksum));
 333        hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
 334        netlog(icmp->f, Logicmp,
 335               "icmp echo reply: To %V, TTL %d, ID %d, SEQ %d\n",
 336               q->dst, q->ttl, nhgets(q->icmpid), nhgets(q->seq));
 337        return bp;
 338}
 339
 340static char *unreachcode[] = {
 341        [0] "net unreachable",
 342        [1] "host unreachable",
 343        [2] "protocol unreachable",
 344        [3] "port unreachable",
 345        [4] "fragmentation needed and DF set",
 346        [5] "source route failed",
 347};
 348
 349static void icmpiput(struct Proto *icmp, struct Ipifc *unused, struct block *bp)
 350{
 351        int n, iplen;
 352        Icmp *p;
 353        struct block *r;
 354        struct Proto *pr;
 355        char *msg;
 356        char m2[128];
 357        Icmppriv *ipriv;
 358
 359        bp = pullupblock(bp, ICMP_IPSIZE + ICMP_HDRSIZE);
 360        if (bp == NULL)
 361                return;
 362
 363        ipriv = icmp->priv;
 364
 365        ipriv->stats[InMsgs]++;
 366
 367        p = (Icmp *) bp->rp;
 368        /* The ID and SEQ are only for Echo Request and Reply, but close enough.
 369         */
 370        netlog(icmp->f, Logicmp,
 371               "icmp input: Type %s (%d,%d), From %V, TTL %d, ID %d, SEQ %d\n",
 372               icmpnames[MIN(p->type, Maxtype)], p->type, p->code, p->src,
 373               p->ttl, nhgets(p->icmpid), nhgets(p->seq));
 374        n = blocklen(bp);
 375        if (n < ICMP_IPSIZE + ICMP_HDRSIZE) {
 376                /* pullupblock should fail if dlen < size.  b->len >= b->dlen */
 377                panic("We did a pullupblock and thought we had enough!");
 378                ipriv->stats[InErrors]++;
 379                ipriv->stats[HlenErrs]++;
 380                netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
 381                goto raise;
 382        }
 383        iplen = nhgets(p->length);
 384        if (iplen > n || (iplen % 1)) {
 385                ipriv->stats[LenErrs]++;
 386                ipriv->stats[InErrors]++;
 387                netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
 388                goto raise;
 389        }
 390        if (ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)) {
 391                ipriv->stats[InErrors]++;
 392                ipriv->stats[CsumErrs]++;
 393                netlog(icmp->f, Logicmp, "icmp checksum error\n");
 394                goto raise;
 395        }
 396        if (p->type <= Maxtype)
 397                ipriv->in[p->type]++;
 398
 399        switch (p->type) {
 400        case EchoRequest:
 401                if (iplen < n)
 402                        bp = trimblock(bp, 0, iplen);
 403                r = mkechoreply(icmp, bp);
 404                ipriv->out[EchoReply]++;
 405                ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, NULL);
 406                break;
 407        case Unreachable:
 408                if (p->code > 5)
 409                        msg = unreachcode[1];
 410                else
 411                        msg = unreachcode[p->code];
 412
 413                bp->rp += ICMP_IPSIZE + ICMP_HDRSIZE;
 414                if (blocklen(bp) < MinAdvise) {
 415                        ipriv->stats[LenErrs]++;
 416                        goto raise;
 417                }
 418                p = (Icmp *) bp->rp;
 419                pr = Fsrcvpcolx(icmp->f, p->proto);
 420                if (pr != NULL && pr->advise != NULL) {
 421                        (*pr->advise) (pr, bp, msg);
 422                        return;
 423                }
 424
 425                bp->rp -= ICMP_IPSIZE + ICMP_HDRSIZE;
 426                goticmpkt(icmp, bp);
 427                break;
 428        case TimeExceed:
 429                if (p->code == 0) {
 430                        snprintf(m2, sizeof(m2), "ttl exceeded at %V", p->src);
 431
 432                        bp->rp += ICMP_IPSIZE + ICMP_HDRSIZE;
 433                        if (blocklen(bp) < MinAdvise) {
 434                                ipriv->stats[LenErrs]++;
 435                                goto raise;
 436                        }
 437                        p = (Icmp *) bp->rp;
 438                        pr = Fsrcvpcolx(icmp->f, p->proto);
 439                        if (pr != NULL && pr->advise != NULL) {
 440                                (*pr->advise) (pr, bp, m2);
 441                                return;
 442                        }
 443                        bp->rp -= ICMP_IPSIZE + ICMP_HDRSIZE;
 444                }
 445
 446                goticmpkt(icmp, bp);
 447                break;
 448        default:
 449                goticmpkt(icmp, bp);
 450                break;
 451        }
 452        return;
 453
 454raise:
 455        freeblist(bp);
 456}
 457
 458void icmpadvise(struct Proto *icmp, struct block *bp, char *msg)
 459{
 460        struct conv **c, *s;
 461        Icmp *p;
 462        uint8_t dst[IPaddrlen];
 463        uint16_t recid;
 464
 465        p = (Icmp *) bp->rp;
 466        v4tov6(dst, p->dst);
 467        recid = nhgets(p->icmpid);
 468
 469        for (c = icmp->conv; *c; c++) {
 470                s = *c;
 471                if (s->lport == recid)
 472                        if (ipcmp(s->raddr, dst) == 0) {
 473                                qhangup(s->rq, msg);
 474                                qhangup(s->wq, msg);
 475                                break;
 476                        }
 477        }
 478        freeblist(bp);
 479}
 480
 481int icmpstats(struct Proto *icmp, char *buf, int len)
 482{
 483        Icmppriv *priv;
 484        char *p, *e;
 485        int i;
 486
 487        priv = icmp->priv;
 488        p = buf;
 489        e = p + len;
 490        for (i = 0; i < Nstats; i++)
 491                p = seprintf(p, e, "%s: %u\n", statnames[i], priv->stats[i]);
 492        for (i = 0; i <= Maxtype; i++) {
 493                if (icmpnames[i])
 494                        p = seprintf(p, e, "%s: %u %u\n", icmpnames[i],
 495                                     priv->in[i], priv->out[i]);
 496                else
 497                        p = seprintf(p, e, "%d: %u %u\n", i, priv->in[i],
 498                                     priv->out[i]);
 499        }
 500        return p - buf;
 501}
 502
 503void icmpinit(struct Fs *fs)
 504{
 505        struct Proto *icmp;
 506
 507        icmp = kzmalloc(sizeof(struct Proto), 0);
 508        icmp->priv = kzmalloc(sizeof(Icmppriv), 0);
 509        icmp->name = "icmp";
 510        icmp->connect = icmpconnect;
 511        icmp->announce = icmpannounce;
 512        icmp->state = icmpstate;
 513        icmp->create = icmpcreate;
 514        icmp->close = icmpclose;
 515        icmp->rcv = icmpiput;
 516        icmp->stats = icmpstats;
 517        icmp->ctl = NULL;
 518        icmp->advise = icmpadvise;
 519        icmp->gc = NULL;
 520        icmp->ipproto = IP_ICMPPROTO;
 521        icmp->nc = 128;
 522        icmp->ptclsize = 0;
 523
 524        Fsproto(fs, icmp);
 525}
 526