Provide a shim layer for reference counted pages
authorBarret Rhoden <brho@cs.berkeley.edu>
Tue, 23 Aug 2016 20:54:38 +0000 (16:54 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Tue, 29 Nov 2016 16:27:40 +0000 (11:27 -0500)
Right now, all pages are reference counted.  I'd like to try to stop doing
that to make contig allocations and maybe jumbo pages easier.  Longer term,
I'd like to get away from having a page struct too, though we'll see.

Some code, specifically mlx4, wants page allocations and to do reference
counting per page.  For that code, we provide this shim.

It actually looks like there are some bugs in mlx4's allocation/freeing
code, and how they account for fragments and references for higher-order
allocations.  Linux 4.7 seems to have the same structure, though perhaps
their are different semantics there.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/drivers/net/mlx4/en_rx.c
kern/drivers/net/mlx4/mlx4_en.h
kern/include/linux_compat.h
kern/include/refd_pages.h [new file with mode: 0644]

index c26c0b1..46ebe33 100644 (file)
@@ -42,7 +42,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
                            gfp_t _gfp)
 {
        int order;
-       struct page *page;
+       struct refd_pages *page;
        dma_addr_t dma;
 
        for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
@@ -50,17 +50,17 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
 
                if (order)
                        gfp |= __GFP_COMP | __GFP_NOWARN;
-               page = kva2page(get_cont_pages(order, gfp));
+               page = get_refd_pages(get_cont_pages(order, gfp), order);
                if (likely(page))
                        break;
                if (--order < 0 ||
                    ((PAGE_SIZE << order) < frag_info->frag_size))
                        return -ENOMEM;
        }
-       dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
+       dma = dma_map_page(priv->ddev, rp2page(page), 0, PAGE_SIZE << order,
                           PCI_DMA_FROMDEVICE);
        if (dma_mapping_error(priv->ddev, dma)) {
-               page_decref(page);
+               refd_pages_decref(page);
                return -ENOMEM;
        }
        page_alloc->page_size = PAGE_SIZE << order;
@@ -70,7 +70,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
        /* Not doing get_page() for each frag is a big win
         * on asymetric workloads. Note we can not use atomic_set().
         */
-       atomic_add(&page->pg_kref.refcount,
+       atomic_add(&page->rp_kref.refcount,
                   page_alloc->page_size / frag_info->frag_stride - 1);
        return 0;
 }
@@ -83,7 +83,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 {
        struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
        const struct mlx4_en_frag_info *frag_info;
-       struct page *page;
+       struct refd_pages *page;
        dma_addr_t dma;
        int i;
 
@@ -115,8 +115,8 @@ out:
                        dma_unmap_page(priv->ddev, page_alloc[i].dma,
                                page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
                        page = page_alloc[i].page;
-                       atomic_set(&page->pg_kref.refcount, 1);
-                       page_decref(page);
+                       atomic_set(&page->rp_kref.refcount, 1);
+                       refd_pages_decref(page);
                }
        }
        return -ENOMEM;
@@ -135,7 +135,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
                               PCI_DMA_FROMDEVICE);
 
        if (frags[i].page)
-               page_decref(frags[i].page);
+               refd_pages_decref(frags[i].page);
 }
 
 static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
@@ -153,20 +153,20 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
 
                en_dbg(DRV, priv, "  frag %d allocator: - size:%d frags:%d\n",
                       i, ring->page_alloc[i].page_size,
-                      atomic_read(&ring->page_alloc[i].page->pg_kref.refcount));
+                      atomic_read(&ring->page_alloc[i].page->rp_kref.refcount));
        }
        return 0;
 
 out:
        while (i--) {
-               struct page *page;
+               struct refd_pages *page;
 
                page_alloc = &ring->page_alloc[i];
                dma_unmap_page(priv->ddev, page_alloc->dma,
                               page_alloc->page_size, PCI_DMA_FROMDEVICE);
                page = page_alloc->page;
-               atomic_set(&page->pg_kref.refcount, 1);
-               page_decref(page);
+               atomic_set(&page->rp_kref.refcount, 1);
+               refd_pages_decref(page);
                page_alloc->page = NULL;
        }
        return -ENOMEM;
@@ -589,7 +589,7 @@ static void dump_packet(struct mlx4_en_priv *priv,
 {
        void *va;
 
-       va = page_address(frags[0].page) + frags[0].page_offset;
+       va = page_address(rp2page(frags[0].page)) + frags[0].page_offset;
 
        if (length <= SMALL_PACKET_SIZE) {
                hexdump(va, length);
@@ -616,7 +616,7 @@ static void recv_packet(struct mlx4_en_priv *priv,
                return;
        }
 
-       va = page_address(frags[0].page) + frags[0].page_offset;
+       va = page_address(rp2page(frags[0].page)) + frags[0].page_offset;
        memcpy(block->wp, va, length);
        block->wp += length;
 
index 6197268..faabb0b 100644 (file)
@@ -234,7 +234,7 @@ struct mlx4_en_tx_desc {
 #define MLX4_EN_CX3_HIGH_ID    0x1005
 
 struct mlx4_en_rx_alloc {
-       struct page     *page;
+       struct refd_pages       *page;
        dma_addr_t      dma;
        uint32_t                page_offset;
        uint32_t                page_size;
index f80beda..014ac3c 100644 (file)
@@ -25,6 +25,7 @@
 #include <taskqueue.h>
 #include <zlib.h>
 #include <list.h>
+#include <refd_pages.h>
 #include <linux/errno.h>
 /* temporary dumping ground */
 #include "compat_todo.h"
diff --git a/kern/include/refd_pages.h b/kern/include/refd_pages.h
new file mode 100644 (file)
index 0000000..b61f1a5
--- /dev/null
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 Google Inc.
+ * Barret Rhoden <brho@cs.berkeley.edu>
+ * See LICENSE for details.
+ *
+ * Helpers for reference counted pages, for use with Linux code.
+ *
+ * Some code wants to use reference counted pages.  I'd like to keep these
+ * uses separate from the main memory allocator.  Code that wants reference
+ * counted pages can use these helpers.
+ *
+ * Pass in memory allocated with get_cont_pages(). */
+
+#pragma once
+
+#include <kref.h>
+#include <page_alloc.h>
+#include <pmap.h>
+#include <kmalloc.h>
+#include <assert.h>
+
+struct refd_pages {
+       void                    *rp_kva;
+       size_t                  rp_order;
+       struct kref             rp_kref;
+};
+
+static struct page *rp2page(struct refd_pages *rp)
+{
+       return kva2page(rp->rp_kva);
+}
+
+static void refd_pages_release(struct kref *kref)
+{
+       struct refd_pages *rp = container_of(kref, struct refd_pages, rp_kref);
+
+       free_cont_pages(rp->rp_kva, rp->rp_order);
+       kfree(rp);
+}
+
+static struct refd_pages *get_refd_pages(void *kva, size_t order)
+{
+       struct refd_pages *rp;
+
+       if (!kva)
+               return 0;
+       rp = kmalloc(sizeof(struct refd_pages), MEM_WAIT);
+       assert(rp);
+       rp->rp_kva = kva;
+       rp->rp_order = order;
+       kref_init(&rp->rp_kref, refd_pages_release, 1);
+       return rp;
+}
+
+static void refd_pages_decref(struct refd_pages *rp)
+{
+       kref_put(&rp->rp_kref);
+}