mlx4: Support TSO/GSO/LSO
[akaros.git] / kern / drivers / net / mlx4 / en_tx.c
index 0251e7a..58b1a75 100644 (file)
@@ -44,9 +44,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        int tmp;
        int err;
 
-       ring = kzalloc_node(sizeof(*ring), KMALLOC_WAIT, node);
+       ring = kzalloc_node(sizeof(*ring), MEM_WAIT, node);
        if (!ring) {
-               ring = kzmalloc(sizeof(*ring), KMALLOC_WAIT);
+               ring = kzmalloc(sizeof(*ring), MEM_WAIT);
                if (!ring) {
                        en_err(priv, "Failed allocating TX ring\n");
                        return -ENOMEM;
@@ -58,7 +58,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        ring->stride = stride;
 
        tmp = size * sizeof(struct mlx4_en_tx_info);
-       ring->tx_info = kmalloc_node(tmp, KMALLOC_WAIT | __GFP_NOWARN, node);
+       ring->tx_info = kmalloc_node(tmp, MEM_WAIT | __GFP_NOWARN, node);
        if (!ring->tx_info) {
                ring->tx_info = vmalloc(tmp);
                if (!ring->tx_info) {
@@ -70,9 +70,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
                 ring->tx_info, tmp);
 
-       ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, KMALLOC_WAIT, node);
+       ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, MEM_WAIT, node);
        if (!ring->bounce_buf) {
-               ring->bounce_buf = kmalloc(MAX_DESC_SIZE, KMALLOC_WAIT);
+               ring->bounce_buf = kmalloc(MAX_DESC_SIZE, MEM_WAIT);
                if (!ring->bounce_buf) {
                        err = -ENOMEM;
                        goto err_info;
@@ -109,7 +109,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                goto err_map;
        }
 
-       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, KMALLOC_WAIT);
+       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, MEM_WAIT);
        if (err) {
                en_err(priv, "Failed allocating qp %d\n", ring->qpn);
                goto err_reserve;
@@ -718,6 +718,16 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
 }
 #endif
 
+/* Akaros's TCP stack always puts the TCP header in the block main body, so we
+ * don't need to worry about those being in extra_data.  Linux stores the
+ * location of the transport header and whatnot in the SKB - we do the same. */
+static size_t get_lso_hdr_size(struct block *block)
+{
+       if (!(block->flag & Btso))
+               return 0;
+       return block->transport_header_end;
+}
+
 netdev_tx_t mlx4_send_packet(struct block *block, struct ether *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -730,16 +740,39 @@ netdev_tx_t mlx4_send_packet(struct block *block, struct ether *dev)
        int real_size;
        uint32_t index;
        __be32 op_own;
+       int i_frag;
+       int nr_frags = 0;
        bool bounce = false;
        dma_addr_t dma = 0;
        uint32_t byte_count = 0;
+       size_t lso_header_size;
+       /* linear there is something to send in the block header */
+       bool is_linear;
 
        if (!priv->port_up)
                goto tx_drop;
 
        ring = priv->tx_ring[0]; /* TODO multi-queue support */
 
-       real_size = CTRL_SIZE + DS_SIZE;
+       lso_header_size = get_lso_hdr_size(block);
+       for (i_frag = 0; i_frag < block->nr_extra_bufs; i_frag++) {
+               const struct extra_bdata *ebd;
+
+               ebd = &block->extra_data[i_frag];
+               if (ebd->base && ebd->len > 0)
+                       nr_frags++;
+       }
+       /* Transport stack should always put the packet headers in the main body. */
+       assert(!(lso_header_size > BHLEN(block)));
+       /* == means there is nothing in the block main body other than the headers.
+        * in which case, we won't need an extra data_seg. */
+       is_linear = lso_header_size < BHLEN(block);
+
+       real_size = CTRL_SIZE + nr_frags * DS_SIZE;
+       if (is_linear)
+               real_size += DS_SIZE;
+       if (lso_header_size)
+               real_size += ALIGN(lso_header_size + 4, DS_SIZE);
        if (unlikely(!real_size))
                goto tx_drop;
 
@@ -768,23 +801,51 @@ netdev_tx_t mlx4_send_packet(struct block *block, struct ether *dev)
        tx_info->nr_txbb = nr_txbb;
 
        data = &tx_desc->data;
+       if (lso_header_size)
+               data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4,
+                                                     DS_SIZE));
 
        /* valid only for none inline segments */
        tx_info->data_offset = (void *)data - (void *)tx_desc;
        tx_info->inl = 0;
-       tx_info->linear = 1;
-       tx_info->nr_maps = 1;
+       tx_info->linear = is_linear ? 1 : 0;
+       tx_info->nr_maps = nr_frags + tx_info->linear;
+       data += tx_info->nr_maps - 1;
 
-       byte_count = BLEN(block);
+       /* Map fragments if any */
+       for (i_frag = block->nr_extra_bufs - 1; i_frag >= 0; i_frag--) {
+               const struct extra_bdata *ebd;
 
-       dma = dma_map_single(0, block->rp, BLEN(block), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(0, dma)))
-               goto tx_drop_unmap;
+               ebd = &block->extra_data[i_frag];
+               if (!ebd->base || ebd->len <= 0)
+                       continue;
 
-       data->addr = cpu_to_be64(dma);
-       data->lkey = ring->mr_key;
-       bus_wmb();
-       data->byte_count = cpu_to_be32(byte_count);
+               byte_count = ebd->len;
+               dma = dma_map_single(0, (void *)(ebd->base + ebd->off),
+                                    byte_count, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(0, dma)))
+                       goto tx_drop_unmap;
+
+               data->addr = cpu_to_be64(dma);
+               data->lkey = ring->mr_key;
+               bus_wmb();
+               data->byte_count = cpu_to_be32(byte_count);
+               --data;
+       }
+
+       if (tx_info->linear) {
+               byte_count = BHLEN(block) - lso_header_size;
+
+               dma = dma_map_single(0, block->rp + lso_header_size, byte_count,
+                                    DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(0, dma)))
+                       goto tx_drop_unmap;
+
+               data->addr = cpu_to_be64(dma);
+               data->lkey = ring->mr_key;
+               bus_wmb();
+               data->byte_count = cpu_to_be32(byte_count);
+       }
 
        /* tx completion can avoid cache line miss for common cases */
        tx_info->map0_dma = dma;
@@ -798,6 +859,12 @@ netdev_tx_t mlx4_send_packet(struct block *block, struct ether *dev)
 
        /* Prepare ctrl segement apart opcode+ownership */
        tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+       if (likely(block->flag & BCKSUM_FLAGS)) {
+               assert(block->flag & (Budpck | Btcpck));
+               tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+                                                        MLX4_WQE_CTRL_TCP_UDP_CSUM);
+               ring->tx_csum++;
+       }
 
        if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
                struct ethhdr *ethh;
@@ -810,12 +877,39 @@ netdev_tx_t mlx4_send_packet(struct block *block, struct ether *dev)
                tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
        }
 
-       /* Normal (Non LSO) packet */
-       op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
-               ((ring->prod & ring->size) ?
-                cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
-       tx_info->nr_bytes = MAX_T(unsigned int, BLEN(block), ETH_ZLEN);
-       ring->packets++;
+       /* Handle LSO (TSO) packets */
+       if (lso_header_size) {
+               int i;
+
+               /* Mark opcode as LSO */
+               op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
+                       ((ring->prod & ring->size) ?
+                               cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+               /* Fill in the LSO prefix */
+               tx_desc->lso.mss_hdr_size = cpu_to_be32(block->mss << 16 |
+                                                       lso_header_size);
+
+               /* Copy headers;
+                * note that we already verified that it is linear.
+                * brho - meaning that the lso_header_size is within block->rp. */
+               memcpy(tx_desc->lso.header, block->rp, lso_header_size);
+
+               ring->tso_packets++;
+
+               i = ((BLEN(block) - lso_header_size) / block->mss) +
+                       !!((BLEN(block) - lso_header_size) % block->mss);
+               tx_info->nr_bytes = BLEN(block) + (i - 1) * lso_header_size;
+               ring->packets += i;
+       } else {
+
+               /* Normal (Non LSO) packet */
+               op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+                       ((ring->prod & ring->size) ?
+                        cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+               tx_info->nr_bytes = MAX_T(unsigned int, BLEN(block), ETH_ZLEN);
+               ring->packets++;
+       }
        ring->bytes += tx_info->nr_bytes;
        AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, BLEN(block));