Implement transmit checksum offload.
authorAndrew Gallatin <gallatin@google.com>
Thu, 19 Jun 2014 19:29:11 +0000 (12:29 -0700)
committerAndrew Gallatin <gallatin@google.com>
Fri, 20 Jun 2014 16:41:34 +0000 (09:41 -0700)
The basic strategy is to put the pseudo hdr sum into the packet
at the TCP/UDP layer and assume we can do checksum offload.
Then when we know we cannot do checksum offload (ip frags,
sending on ether medium to a device which cannot offload,
or sending on a non-loopback medium), we complete the full
checksum.

I have only implemented hardware checksum offload on
an out-of-tree device.

Reviewed-by: rminnich@gmail.com
Signed-off-by: Andrew Gallatin <gallatin@google.com>
kern/drivers/dev/ether.c
kern/include/ip.h
kern/include/ns.h
kern/src/net/ip.c
kern/src/net/netdevmedium.c
kern/src/net/pktmedium.c
kern/src/net/pppmedium.c
kern/src/net/tcp.c
kern/src/net/udp.c
kern/src/ns/qio.c

index 7c5e1ac..85481c5 100644 (file)
@@ -376,6 +376,8 @@ static int etheroq(struct ether *ether, struct block *bp)
                        hnputs(bp->rp + 2 * Eaddrlen + 2, ether->vlanid & 0xFFF);       /* prio:3 0:1 vid:12 */
                        ether = ether->ctlr;
                }
+
+               ptclcsum_finalize(bp, ether->netif.feat);
                qbwrite(ether->oq, bp);
                if (ether->transmit != NULL)
                        ether->transmit(ether);
index 6ab0fea..2bf7887 100644 (file)
@@ -2,6 +2,7 @@
 
 #ifndef ROS_KERN_IP_H
 #define ROS_KERN_IP_H
+#include <ns.h>
 
 enum {
        Addrlen = 64,
@@ -609,6 +610,20 @@ extern void ip_init(struct Fs *);
 extern void update_mtucache(uint8_t * unused_uint8_p_t, uint32_t);
 extern uint32_t restrict_mtu(uint8_t * unused_uint8_p_t, uint32_t);
 
+static inline void ptclcsum_finalize(struct block *bp, unsigned int feat)
+{
+       unsigned int flag = bp->flag & BCKSUM_FLAGS;
+       uint8_t *csum_store;
+
+       if (flag && (flag & feat) != flag) {
+               csum_store = bp->rp + bp->checksum_start + bp->checksum_offset;
+               hnputs((uint16_t *)csum_store,
+                      ptclcsum(bp, bp->checksum_start,
+                               BLEN(bp) - bp->checksum_start));
+               bp->flag &= ~BCKSUM_FLAGS;
+       }
+}
+
 /*
  * bootp.c
  */
@@ -889,6 +904,20 @@ struct netaddr {
 };
 
 /*
+ * These flags overlap with block flags, to make detecting unsupported
+ * offloads efficient.
+ */
+#define NETF_BASE_SHIFT                (NS_SHIFT_MAX + 1)
+#define NETF_PADMIN_SHIFT      (NETF_BASE_SHIFT + 0)
+#define NETF_SG_SHIFT          (NETF_BASE_SHIFT + 1)
+enum {
+       NETF_IPCK = (1 << NS_IPCK_SHIFT),       /* xmit ip checksum */
+       NETF_UDPCK = (1 << NS_UDPCK_SHIFT),     /* xmit udp checksum */
+       NETF_TCPCK = (1 << NS_TCPCK_SHIFT),     /* xmit tcp checksum */
+       NETF_PADMIN = (1 << NETF_SG_SHIFT),     /* device pads to mintu */
+       NETF_SG = (1 << NETF_SG_SHIFT),         /* device can do scatter/gather */
+};
+/*
  *  a network interface
  */
 struct netif {
@@ -904,6 +933,7 @@ struct netif {
        int alen;                                       /* address length */
        int mbps;                                       /* megabits per sec */
        int link;                                       /* link status */
+       unsigned int feat;                              /* dev features */
        uint8_t addr[Nmaxaddr];
        uint8_t bcast[Nmaxaddr];
        struct netaddr *maddr;          /* known multicast addresses */
index 612ff38..bd8a09b 100644 (file)
@@ -314,14 +314,21 @@ enum {
        CAPPEND = 0x0100,       /* append on write */
 };
 
+#define NS_IPCK_SHIFT  2
+#define NS_UDPCK_SHIFT 3
+#define NS_TCPCK_SHIFT 4
+#define NS_PKTCK_SHIFT 5
+#define NS_SHIFT_MAX 5
+
 enum {
        BINTR = (1 << 0),
        BFREE = (1 << 1),
-       Bipck = (1 << 2),       /* ip checksum */
-       Budpck = (1 << 3),      /* udp checksum */
-       Btcpck = (1 << 4),      /* tcp checksum */
-       Bpktck = (1 << 5),      /* packet checksum */
+       Bipck = (1 << NS_IPCK_SHIFT),   /* ip checksum */
+       Budpck = (1 << NS_UDPCK_SHIFT), /* udp checksum */
+       Btcpck = (1 << NS_TCPCK_SHIFT), /* tcp checksum */
+       Bpktck = (1 << NS_PKTCK_SHIFT), /* packet checksum */
 };
+#define BCKSUM_FLAGS (Bipck|Budpck|Btcpck|Bpktck)
 
 struct block {
        struct block *next;
@@ -333,6 +340,8 @@ struct block {
        void (*free) (struct block *);
        uint16_t flag;
        uint16_t checksum;                      /* IP checksum of complete packet (minus media header) */
+       uint16_t checksum_start;                /* off from start of block to start csum */
+       uint16_t checksum_offset;               /* off from checksum_offset to store csum */
 };
 #define BLEN(s)        ((s)->wp - (s)->rp)
 #define BALLOC(s) ((s)->lim - (s)->base)
index ee0888f..ab4077b 100644 (file)
@@ -332,6 +332,9 @@ ipoput4(struct Fs *f,
                goto raise;
        }
 
+       /* compute tcp/udp checksum in software before fragmenting */
+       ptclcsum_finalize(bp, 0);
+
        dlen = len - IP4HDR;
        xp = bp;
        if (gating)
index cd2b9f3..199425f 100644 (file)
@@ -96,6 +96,7 @@ netdevbwrite(struct Ipifc *ifc, struct block *bp, int unused_int,
        if (BLEN(bp) < ifc->mintu)
                bp = adjustblock(bp, ifc->mintu);
 
+       ptclcsum_finalize(bp, 0);
        devtab[er->mchan->type].bwrite(er->mchan, bp, 0);
        ifc->out++;
 }
index f98a78b..6143f87 100644 (file)
@@ -56,6 +56,7 @@ pktbwrite(struct Ipifc *ifc, struct block *bp, int unused_int,
 {
        /* enqueue onto the conversation's rq */
        bp = concatblock(bp);
+       ptclcsum_finalize(bp, 0);
        if (atomic_read(&ifc->conv->snoopers) > 0)
                qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
        qpass(ifc->conv->rq, bp);
index d6651c5..56dc2b3 100644 (file)
@@ -158,6 +158,7 @@ static void pppbwrite(Ipifc * ifc, Block * bp, int, uchar *)
 {
        PPP *ppp = ifc->arg;
 
+       ptclcsum_finalize(bp, 0);
        pppwrite(ppp, bp);
        ifc->out++;
 }
index b2bce39..342afa9 100644 (file)
@@ -1063,8 +1063,11 @@ struct block *htontcp4(Tcp * tcph, struct block *data, Tcp4hdr * ph,
        if (tcb != NULL && tcb->nochecksum) {
                h->tcpcksum[0] = h->tcpcksum[1] = 0;
        } else {
-               csum = ptclcsum(data, TCP4_IPLEN, hdrlen + dlen + TCP4_PHDRSIZE);
+               csum = ~ptclcsum(data, TCP4_IPLEN, TCP4_PHDRSIZE);
                hnputs(h->tcpcksum, csum);
+               data->checksum_start = TCP4_IPLEN + TCP4_PHDRSIZE;
+               data->checksum_offset = ph->tcpcksum - ph->tcpsport;
+               data->flag |= Btcpck;
        }
 
        return data;
index ff22719..10fa78b 100644 (file)
@@ -294,8 +294,10 @@ void udpkick(void *x, struct block *bp)
                        uh4->udpcksum[0] = 0;
                        uh4->udpcksum[1] = 0;
                        hnputs(uh4->udpcksum,
-                                  ptclcsum(bp, UDP4_PHDR_OFF,
-                                                       dlen + UDP_UDPHDR_SZ + UDP4_PHDR_SZ));
+                                  ~ptclcsum(bp, UDP4_PHDR_OFF, UDP4_PHDR_SZ));
+                       bp->checksum_start = UDP4_IPHDR_SZ;
+                       bp->checksum_offset = uh4->udpcksum - uh4->udpsport;
+                       bp->flag |= Budpck;
                        uh4->vihl = IP_VER4;
                        ipoput4(f, bp, 0, c->ttl, c->tos, rc);
                        break;
index c8e82db..a3687fa 100644 (file)
@@ -92,10 +92,14 @@ struct block *padblock(struct block *bp, int size)
 {
        int n;
        struct block *nbp;
+       uint8_t bcksum = bp->flag & BCKSUM_FLAGS;
+       uint16_t checksum_start = bp->checksum_start;
+       uint16_t checksum_offset = bp->checksum_offset;
 
        QDEBUG checkb(bp, "padblock 1");
        if (size >= 0) {
                if (bp->rp - bp->base >= size) {
+                       bp->checksum_start += size;
                        bp->rp -= size;
                        return bp;
                }
@@ -127,6 +131,11 @@ struct block *padblock(struct block *bp, int size)
                nbp->wp += n;
                freeb(bp);
        }
+       if (bcksum) {
+               nbp->flag |= bcksum;
+               nbp->checksum_start = checksum_start;
+               nbp->checksum_offset = checksum_offset;
+       }
        QDEBUG checkb(nbp, "padblock 1");
        return nbp;
 }
@@ -306,6 +315,11 @@ struct block *copyblock(struct block *bp, int count)
 
        QDEBUG checkb(bp, "copyblock 0");
        nbp = allocb(count);
+       if (bp->flag & BCKSUM_FLAGS) {
+               nbp->flag |= (bp->flag & BCKSUM_FLAGS);
+               nbp->checksum_start = bp->checksum_start;
+               nbp->checksum_offset = bp->checksum_offset;
+       }
        for (; count > 0 && bp != 0; bp = bp->next) {
                l = BLEN(bp);
                if (l > count)