ioat: import the IOAT driver from Linux
authorBarret Rhoden <brho@cs.berkeley.edu>
Mon, 26 Aug 2019 18:19:02 +0000 (14:19 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Tue, 8 Oct 2019 21:11:10 +0000 (17:11 -0400)
From drivers/dma/ioat/, Linux 5.2.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/drivers/dma/ioat/dma.c [new file with mode: 0644]
kern/drivers/dma/ioat/dma.h [new file with mode: 0644]
kern/drivers/dma/ioat/hw.h [new file with mode: 0644]
kern/drivers/dma/ioat/init.c [new file with mode: 0644]
kern/drivers/dma/ioat/prep.c [new file with mode: 0644]
kern/drivers/dma/ioat/registers.h [new file with mode: 0644]

diff --git a/kern/drivers/dma/ioat/dma.c b/kern/drivers/dma/ioat/dma.c
new file mode 100644 (file)
index 0000000..f373a13
--- /dev/null
@@ -0,0 +1,1048 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static char *chanerr_str[] = {
+       "DMA Transfer Source Address Error",
+       "DMA Transfer Destination Address Error",
+       "Next Descriptor Address Error",
+       "Descriptor Error",
+       "Chan Address Value Error",
+       "CHANCMD Error",
+       "Chipset Uncorrectable Data Integrity Error",
+       "DMA Uncorrectable Data Integrity Error",
+       "Read Data Error",
+       "Write Data Error",
+       "Descriptor Control Error",
+       "Descriptor Transfer Size Error",
+       "Completion Address Error",
+       "Interrupt Configuration Error",
+       "Super extended descriptor Address Error",
+       "Unaffiliated Error",
+       "CRC or XOR P Error",
+       "XOR Q Error",
+       "Descriptor Count Error",
+       "DIF All F detect Error",
+       "Guard Tag verification Error",
+       "Application Tag verification Error",
+       "Reference Tag verification Error",
+       "Bundle Bit Error",
+       "Result DIF All F detect Error",
+       "Result Guard Tag verification Error",
+       "Result Application Tag verification Error",
+       "Result Reference Tag verification Error",
+};
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan);
+
+static void ioat_print_chanerrs(struct ioatdma_chan *ioat_chan, u32 chanerr)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(chanerr_str); i++) {
+               if ((chanerr >> i) & 1) {
+                       dev_err(to_dev(ioat_chan), "Err(%d): %s\n",
+                               i, chanerr_str[i]);
+               }
+       }
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+       struct ioatdma_device *instance = data;
+       struct ioatdma_chan *ioat_chan;
+       unsigned long attnstatus;
+       int bit;
+       u8 intrctrl;
+
+       intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+               return IRQ_NONE;
+
+       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+               writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+               return IRQ_NONE;
+       }
+
+       attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+       for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
+               ioat_chan = ioat_chan_by_index(instance, bit);
+               if (test_bit(IOAT_RUN, &ioat_chan->state))
+                       tasklet_schedule(&ioat_chan->cleanup_task);
+       }
+
+       writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+       return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+       struct ioatdma_chan *ioat_chan = data;
+
+       if (test_bit(IOAT_RUN, &ioat_chan->state))
+               tasklet_schedule(&ioat_chan->cleanup_task);
+
+       return IRQ_HANDLED;
+}
+
+void ioat_stop(struct ioatdma_chan *ioat_chan)
+{
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct pci_dev *pdev = ioat_dma->pdev;
+       int chan_id = chan_num(ioat_chan);
+       struct msix_entry *msix;
+
+       /* 1/ stop irq from firing tasklets
+        * 2/ stop the tasklet from re-arming irqs
+        */
+       clear_bit(IOAT_RUN, &ioat_chan->state);
+
+       /* flush inflight interrupts */
+       switch (ioat_dma->irq_mode) {
+       case IOAT_MSIX:
+               msix = &ioat_dma->msix_entries[chan_id];
+               synchronize_irq(msix->vector);
+               break;
+       case IOAT_MSI:
+       case IOAT_INTX:
+               synchronize_irq(pdev->irq);
+               break;
+       default:
+               break;
+       }
+
+       /* flush inflight timers */
+       del_timer_sync(&ioat_chan->timer);
+
+       /* flush inflight tasklet runs */
+       tasklet_kill(&ioat_chan->cleanup_task);
+
+       /* final cleanup now that everything is quiesced and can't re-arm */
+       ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan);
+}
+
+static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan)
+{
+       ioat_chan->dmacount += ioat_ring_pending(ioat_chan);
+       ioat_chan->issued = ioat_chan->head;
+       writew(ioat_chan->dmacount,
+              ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+       dev_dbg(to_dev(ioat_chan),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat_chan->head, ioat_chan->tail,
+               ioat_chan->issued, ioat_chan->dmacount);
+}
+
+void ioat_issue_pending(struct dma_chan *c)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+       if (ioat_ring_pending(ioat_chan)) {
+               spin_lock_bh(&ioat_chan->prep_lock);
+               __ioat_issue_pending(ioat_chan);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+       }
+}
+
+/**
+ * ioat_update_pending - log pending descriptors
+ * @ioat: ioat+ channel
+ *
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark.  Called with prep_lock held
+ */
+static void ioat_update_pending(struct ioatdma_chan *ioat_chan)
+{
+       if (ioat_ring_pending(ioat_chan) > ioat_pending_level)
+               __ioat_issue_pending(ioat_chan);
+}
+
+static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+
+       if (ioat_ring_space(ioat_chan) < 1) {
+               dev_err(to_dev(ioat_chan),
+                       "Unable to start null desc - ring full\n");
+               return;
+       }
+
+       dev_dbg(to_dev(ioat_chan),
+               "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+       desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.compl_write = 1;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+       async_tx_ack(&desc->txd);
+       ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+       dump_desc_dbg(ioat_chan, desc);
+       /* make sure descriptors are written before we submit */
+       wmb();
+       ioat_chan->head += 1;
+       __ioat_issue_pending(ioat_chan);
+}
+
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+       spin_lock_bh(&ioat_chan->prep_lock);
+       if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               __ioat_start_null_desc(ioat_chan);
+       spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan)
+{
+       /* set the tail to be re-issued */
+       ioat_chan->issued = ioat_chan->tail;
+       ioat_chan->dmacount = 0;
+       mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       dev_dbg(to_dev(ioat_chan),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat_chan->head, ioat_chan->tail,
+               ioat_chan->issued, ioat_chan->dmacount);
+
+       if (ioat_ring_pending(ioat_chan)) {
+               struct ioat_ring_ent *desc;
+
+               desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+               ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+               __ioat_issue_pending(ioat_chan);
+       } else
+               __ioat_start_null_desc(ioat_chan);
+}
+
+static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+       unsigned long end = jiffies + tmo;
+       int err = 0;
+       u32 status;
+
+       status = ioat_chansts(ioat_chan);
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               ioat_suspend(ioat_chan);
+       while (is_ioat_active(status) || is_ioat_idle(status)) {
+               if (tmo && time_after(jiffies, end)) {
+                       err = -ETIMEDOUT;
+                       break;
+               }
+               status = ioat_chansts(ioat_chan);
+               cpu_relax();
+       }
+
+       return err;
+}
+
+static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+       unsigned long end = jiffies + tmo;
+       int err = 0;
+
+       ioat_reset(ioat_chan);
+       while (ioat_reset_pending(ioat_chan)) {
+               if (end && time_after(jiffies, end)) {
+                       err = -ETIMEDOUT;
+                       break;
+               }
+               cpu_relax();
+       }
+
+       return err;
+}
+
+static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+       __releases(&ioat_chan->prep_lock)
+{
+       struct dma_chan *c = tx->chan;
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       dma_cookie_t cookie;
+
+       cookie = dma_cookie_assign(tx);
+       dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie);
+
+       if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+               mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       /* make descriptor updates visible before advancing ioat->head,
+        * this is purposefully not smp_wmb() since we are also
+        * publishing the descriptor updates to a dma device
+        */
+       wmb();
+
+       ioat_chan->head += ioat_chan->produce;
+
+       ioat_update_pending(ioat_chan);
+       spin_unlock_bh(&ioat_chan->prep_lock);
+
+       return cookie;
+}
+
+static struct ioat_ring_ent *
+ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
+{
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+       int chunk;
+       dma_addr_t phys;
+       u8 *pos;
+       off_t offs;
+
+       chunk = idx / IOAT_DESCS_PER_2M;
+       idx &= (IOAT_DESCS_PER_2M - 1);
+       offs = idx * IOAT_DESC_SZ;
+       pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+       phys = ioat_chan->descs[chunk].hw + offs;
+       hw = (struct ioat_dma_descriptor *)pos;
+       memset(hw, 0, sizeof(*hw));
+
+       desc = kmem_cache_zalloc(ioat_cache, flags);
+       if (!desc)
+               return NULL;
+
+       dma_async_tx_descriptor_init(&desc->txd, chan);
+       desc->txd.tx_submit = ioat_tx_submit_unlock;
+       desc->hw = hw;
+       desc->txd.phys = phys;
+       return desc;
+}
+
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+       kmem_cache_free(ioat_cache, desc);
+}
+
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent **ring;
+       int total_descs = 1 << order;
+       int i, chunks;
+
+       /* allocate the array to hold the software ring */
+       ring = kcalloc(total_descs, sizeof(*ring), flags);
+       if (!ring)
+               return NULL;
+
+       ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
+
+       for (i = 0; i < chunks; i++) {
+               struct ioat_descs *descs = &ioat_chan->descs[i];
+
+               descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+                                                SZ_2M, &descs->hw, flags);
+               if (!descs->virt && (i > 0)) {
+                       int idx;
+
+                       for (idx = 0; idx < i; idx++) {
+                               dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+                                                 descs->virt, descs->hw);
+                               descs->virt = NULL;
+                               descs->hw = 0;
+                       }
+
+                       ioat_chan->desc_chunks = 0;
+                       kfree(ring);
+                       return NULL;
+               }
+       }
+
+       for (i = 0; i < total_descs; i++) {
+               ring[i] = ioat_alloc_ring_ent(c, i, flags);
+               if (!ring[i]) {
+                       int idx;
+
+                       while (i--)
+                               ioat_free_ring_ent(ring[i], c);
+
+                       for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+                               dma_free_coherent(to_dev(ioat_chan),
+                                                 SZ_2M,
+                                                 ioat_chan->descs[idx].virt,
+                                                 ioat_chan->descs[idx].hw);
+                               ioat_chan->descs[idx].virt = NULL;
+                               ioat_chan->descs[idx].hw = 0;
+                       }
+
+                       ioat_chan->desc_chunks = 0;
+                       kfree(ring);
+                       return NULL;
+               }
+               set_desc_id(ring[i], i);
+       }
+
+       /* link descs */
+       for (i = 0; i < total_descs-1; i++) {
+               struct ioat_ring_ent *next = ring[i+1];
+               struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+               hw->next = next->txd.phys;
+       }
+       ring[i]->hw->next = ring[0]->txd.phys;
+
+       /* setup descriptor pre-fetching for v3.4 */
+       if (ioat_dma->cap & IOAT_CAP_DPS) {
+               u16 drsctl = IOAT_CHAN_DRSZ_2MB | IOAT_CHAN_DRS_EN;
+
+               if (chunks == 1)
+                       drsctl |= IOAT_CHAN_DRS_AUTOWRAP;
+
+               writew(drsctl, ioat_chan->reg_base + IOAT_CHAN_DRSCTL_OFFSET);
+
+       }
+
+       return ring;
+}
+
+/**
+ * ioat_check_space_lock - verify space and grab ring producer lock
+ * @ioat: ioat,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
+       __acquires(&ioat_chan->prep_lock)
+{
+       spin_lock_bh(&ioat_chan->prep_lock);
+       /* never allow the last descriptor to be consumed, we need at
+        * least one free at all times to allow for on-the-fly ring
+        * resizing.
+        */
+       if (likely(ioat_ring_space(ioat_chan) > num_descs)) {
+               dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n",
+                       __func__, num_descs, ioat_chan->head,
+                       ioat_chan->tail, ioat_chan->issued);
+               ioat_chan->produce = num_descs;
+               return 0;  /* with ioat->prep_lock held */
+       }
+       spin_unlock_bh(&ioat_chan->prep_lock);
+
+       dev_dbg_ratelimited(to_dev(ioat_chan),
+                           "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+                           __func__, num_descs, ioat_chan->head,
+                           ioat_chan->tail, ioat_chan->issued);
+
+       /* progress reclaim in the allocation failure case we may be
+        * called under bh_disabled so we need to trigger the timer
+        * event directly
+        */
+       if (time_is_before_jiffies(ioat_chan->timer.expires)
+           && timer_pending(&ioat_chan->timer)) {
+               mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+               ioat_timer_event(&ioat_chan->timer);
+       }
+
+       return -ENOMEM;
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+       struct ioat_dma_descriptor *hw = desc->hw;
+
+       if (hw->ctl_f.op == IOAT_OP_XOR ||
+           hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+               struct ioat_xor_descriptor *xor = desc->xor;
+
+               if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+                       return true;
+       } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+                  hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+               struct ioat_pq_descriptor *pq = desc->pq;
+
+               if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+                       return true;
+       }
+
+       return false;
+}
+
+static void
+ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed)
+{
+       if (!sed)
+               return;
+
+       dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
+       kmem_cache_free(ioat_sed_cache, sed);
+}
+
+static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan)
+{
+       u64 phys_complete;
+       u64 completion;
+
+       completion = *ioat_chan->completion;
+       phys_complete = ioat_chansts_to_addr(completion);
+
+       dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__,
+               (unsigned long long) phys_complete);
+
+       return phys_complete;
+}
+
+static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan,
+                                  u64 *phys_complete)
+{
+       *phys_complete = ioat_get_current_completion(ioat_chan);
+       if (*phys_complete == ioat_chan->last_completion)
+               return false;
+
+       clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+       mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       return true;
+}
+
+static void
+desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc)
+{
+       struct ioat_dma_descriptor *hw = desc->hw;
+
+       switch (hw->ctl_f.op) {
+       case IOAT_OP_PQ_VAL:
+       case IOAT_OP_PQ_VAL_16S:
+       {
+               struct ioat_pq_descriptor *pq = desc->pq;
+
+               /* check if there's error written */
+               if (!pq->dwbes_f.wbes)
+                       return;
+
+               /* need to set a chanerr var for checking to clear later */
+
+               if (pq->dwbes_f.p_val_err)
+                       *desc->result |= SUM_CHECK_P_RESULT;
+
+               if (pq->dwbes_f.q_val_err)
+                       *desc->result |= SUM_CHECK_Q_RESULT;
+
+               return;
+       }
+       default:
+               return;
+       }
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ */
+static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
+{
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *desc;
+       bool seen_current = false;
+       int idx = ioat_chan->tail, i;
+       u16 active;
+
+       dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+
+       /*
+        * At restart of the channel, the completion address and the
+        * channel status will be 0 due to starting a new chain. Since
+        * it's new chain and the first descriptor "fails", there is
+        * nothing to clean up. We do not want to reap the entire submitted
+        * chain due to this 0 address value and then BUG.
+        */
+       if (!phys_complete)
+               return;
+
+       active = ioat_ring_active(ioat_chan);
+       for (i = 0; i < active && !seen_current; i++) {
+               struct dma_async_tx_descriptor *tx;
+
+               prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               dump_desc_dbg(ioat_chan, desc);
+
+               /* set err stat if we are using dwbes */
+               if (ioat_dma->cap & IOAT_CAP_DWBES)
+                       desc_get_errstat(ioat_chan, desc);
+
+               tx = &desc->txd;
+               if (tx->cookie) {
+                       dma_cookie_complete(tx);
+                       dma_descriptor_unmap(tx);
+                       dmaengine_desc_get_callback_invoke(tx, NULL);
+                       tx->callback = NULL;
+                       tx->callback_result = NULL;
+               }
+
+               if (tx->phys == phys_complete)
+                       seen_current = true;
+
+               /* skip extended descriptors */
+               if (desc_has_ext(desc)) {
+                       BUG_ON(i + 1 >= active);
+                       i++;
+               }
+
+               /* cleanup super extended descriptors */
+               if (desc->sed) {
+                       ioat_free_sed(ioat_dma, desc->sed);
+                       desc->sed = NULL;
+               }
+       }
+
+       /* finish all descriptor reads before incrementing tail */
+       smp_mb();
+       ioat_chan->tail = idx + i;
+       /* no active descs have written a completion? */
+       BUG_ON(active && !seen_current);
+       ioat_chan->last_completion = phys_complete;
+
+       if (active - i == 0) {
+               dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
+                       __func__);
+               mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+
+       /* microsecond delay by sysfs variable  per pending descriptor */
+       if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) {
+               writew(min((ioat_chan->intr_coalesce * (active - i)),
+                      IOAT_INTRDELAY_MASK),
+                      ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
+               ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce;
+       }
+}
+
+static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
+{
+       u64 phys_complete;
+
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+
+       if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+               __cleanup(ioat_chan, phys_complete);
+
+       if (is_ioat_halted(*ioat_chan->completion)) {
+               u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+               if (chanerr &
+                   (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
+                       mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+                       ioat_eh(ioat_chan);
+               }
+       }
+
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+void ioat_cleanup_event(unsigned long data)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data);
+
+       ioat_cleanup(ioat_chan);
+       if (!test_bit(IOAT_RUN, &ioat_chan->state))
+               return;
+       writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
+{
+       u64 phys_complete;
+
+       /* set the completion address register again */
+       writel(lower_32_bits(ioat_chan->completion_dma),
+              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(upper_32_bits(ioat_chan->completion_dma),
+              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       ioat_quiesce(ioat_chan, 0);
+       if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+               __cleanup(ioat_chan, phys_complete);
+
+       __ioat_restart_chan(ioat_chan);
+}
+
+
+static void ioat_abort_descs(struct ioatdma_chan *ioat_chan)
+{
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *desc;
+       u16 active;
+       int idx = ioat_chan->tail, i;
+
+       /*
+        * We assume that the failed descriptor has been processed.
+        * Now we are just returning all the remaining submitted
+        * descriptors to abort.
+        */
+       active = ioat_ring_active(ioat_chan);
+
+       /* we skip the failed descriptor that tail points to */
+       for (i = 1; i < active; i++) {
+               struct dma_async_tx_descriptor *tx;
+
+               prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+
+               tx = &desc->txd;
+               if (tx->cookie) {
+                       struct dmaengine_result res;
+
+                       dma_cookie_complete(tx);
+                       dma_descriptor_unmap(tx);
+                       res.result = DMA_TRANS_ABORTED;
+                       dmaengine_desc_get_callback_invoke(tx, &res);
+                       tx->callback = NULL;
+                       tx->callback_result = NULL;
+               }
+
+               /* skip extended descriptors */
+               if (desc_has_ext(desc)) {
+                       WARN_ON(i + 1 >= active);
+                       i++;
+               }
+
+               /* cleanup super extended descriptors */
+               if (desc->sed) {
+                       ioat_free_sed(ioat_dma, desc->sed);
+                       desc->sed = NULL;
+               }
+       }
+
+       smp_mb(); /* finish all descriptor reads before incrementing tail */
+       ioat_chan->tail = idx + active;
+
+       desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+       ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys;
+}
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan)
+{
+       struct pci_dev *pdev = to_pdev(ioat_chan);
+       struct ioat_dma_descriptor *hw;
+       struct dma_async_tx_descriptor *tx;
+       u64 phys_complete;
+       struct ioat_ring_ent *desc;
+       u32 err_handled = 0;
+       u32 chanerr_int;
+       u32 chanerr;
+       bool abort = false;
+       struct dmaengine_result res;
+
+       /* cleanup so tail points to descriptor that caused the error */
+       if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+               __cleanup(ioat_chan, phys_complete);
+
+       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
+
+       dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n",
+               __func__, chanerr, chanerr_int);
+
+       desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+       hw = desc->hw;
+       dump_desc_dbg(ioat_chan, desc);
+
+       switch (hw->ctl_f.op) {
+       case IOAT_OP_XOR_VAL:
+               if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+                       *desc->result |= SUM_CHECK_P_RESULT;
+                       err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+               }
+               break;
+       case IOAT_OP_PQ_VAL:
+       case IOAT_OP_PQ_VAL_16S:
+               if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+                       *desc->result |= SUM_CHECK_P_RESULT;
+                       err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+               }
+               if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
+                       *desc->result |= SUM_CHECK_Q_RESULT;
+                       err_handled |= IOAT_CHANERR_XOR_Q_ERR;
+               }
+               break;
+       }
+
+       if (chanerr & IOAT_CHANERR_RECOVER_MASK) {
+               if (chanerr & IOAT_CHANERR_READ_DATA_ERR) {
+                       res.result = DMA_TRANS_READ_FAILED;
+                       err_handled |= IOAT_CHANERR_READ_DATA_ERR;
+               } else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) {
+                       res.result = DMA_TRANS_WRITE_FAILED;
+                       err_handled |= IOAT_CHANERR_WRITE_DATA_ERR;
+               }
+
+               abort = true;
+       } else
+               res.result = DMA_TRANS_NOERROR;
+
+       /* fault on unhandled error or spurious halt */
+       if (chanerr ^ err_handled || chanerr == 0) {
+               dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
+                       __func__, chanerr, err_handled);
+               dev_err(to_dev(ioat_chan), "Errors handled:\n");
+               ioat_print_chanerrs(ioat_chan, err_handled);
+               dev_err(to_dev(ioat_chan), "Errors not handled:\n");
+               ioat_print_chanerrs(ioat_chan, (chanerr & ~err_handled));
+
+               BUG();
+       }
+
+       /* cleanup the faulty descriptor since we are continuing */
+       tx = &desc->txd;
+       if (tx->cookie) {
+               dma_cookie_complete(tx);
+               dma_descriptor_unmap(tx);
+               dmaengine_desc_get_callback_invoke(tx, &res);
+               tx->callback = NULL;
+               tx->callback_result = NULL;
+       }
+
+       /* mark faulting descriptor as complete */
+       *ioat_chan->completion = desc->txd.phys;
+
+       spin_lock_bh(&ioat_chan->prep_lock);
+       /* we need abort all descriptors */
+       if (abort) {
+               ioat_abort_descs(ioat_chan);
+               /* clean up the channel, we could be in weird state */
+               ioat_reset_hw(ioat_chan);
+       }
+
+       writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
+       ioat_restart_channel(ioat_chan);
+       spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void check_active(struct ioatdma_chan *ioat_chan)
+{
+       if (ioat_ring_active(ioat_chan)) {
+               mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+               return;
+       }
+
+       if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+               mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+}
+
+void ioat_timer_event(struct timer_list *t)
+{
+       struct ioatdma_chan *ioat_chan = from_timer(ioat_chan, t, timer);
+       dma_addr_t phys_complete;
+       u64 status;
+
+       status = ioat_chansts(ioat_chan);
+
+       /* when halted due to errors check for channel
+        * programming errors before advancing the completion state
+        */
+       if (is_ioat_halted(status)) {
+               u32 chanerr;
+
+               chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+               dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
+                       __func__, chanerr);
+               dev_err(to_dev(ioat_chan), "Errors:\n");
+               ioat_print_chanerrs(ioat_chan, chanerr);
+
+               if (test_bit(IOAT_RUN, &ioat_chan->state)) {
+                       spin_lock_bh(&ioat_chan->cleanup_lock);
+                       spin_lock_bh(&ioat_chan->prep_lock);
+                       set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+                       spin_unlock_bh(&ioat_chan->prep_lock);
+
+                       ioat_abort_descs(ioat_chan);
+                       dev_warn(to_dev(ioat_chan), "Reset channel...\n");
+                       ioat_reset_hw(ioat_chan);
+                       dev_warn(to_dev(ioat_chan), "Restart channel...\n");
+                       ioat_restart_channel(ioat_chan);
+
+                       spin_lock_bh(&ioat_chan->prep_lock);
+                       clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+                       spin_unlock_bh(&ioat_chan->prep_lock);
+                       spin_unlock_bh(&ioat_chan->cleanup_lock);
+               }
+
+               return;
+       }
+
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+
+       /* handle the no-actives case */
+       if (!ioat_ring_active(ioat_chan)) {
+               spin_lock_bh(&ioat_chan->prep_lock);
+               check_active(ioat_chan);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+               spin_unlock_bh(&ioat_chan->cleanup_lock);
+               return;
+       }
+
+       /* if we haven't made progress and we have already
+        * acknowledged a pending completion once, then be more
+        * forceful with a restart
+        */
+       if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+               __cleanup(ioat_chan, phys_complete);
+       else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+               u32 chanerr;
+
+               chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+               dev_err(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+                       status, chanerr);
+               dev_err(to_dev(ioat_chan), "Errors:\n");
+               ioat_print_chanerrs(ioat_chan, chanerr);
+
+               dev_dbg(to_dev(ioat_chan), "Active descriptors: %d\n",
+                       ioat_ring_active(ioat_chan));
+
+               spin_lock_bh(&ioat_chan->prep_lock);
+               set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+
+               ioat_abort_descs(ioat_chan);
+               dev_warn(to_dev(ioat_chan), "Resetting channel...\n");
+               ioat_reset_hw(ioat_chan);
+               dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
+               ioat_restart_channel(ioat_chan);
+
+               spin_lock_bh(&ioat_chan->prep_lock);
+               clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+               spin_unlock_bh(&ioat_chan->cleanup_lock);
+               return;
+       } else
+               set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+
+       mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+               struct dma_tx_state *txstate)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       enum dma_status ret;
+
+       ret = dma_cookie_status(c, cookie, txstate);
+       if (ret == DMA_COMPLETE)
+               return ret;
+
+       ioat_cleanup(ioat_chan);
+
+       return dma_cookie_status(c, cookie, txstate);
+}
+
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
+{
+       /* throw away whatever the channel was doing and get it
+        * initialized, with ioat3 specific workarounds
+        */
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct pci_dev *pdev = ioat_dma->pdev;
+       u32 chanerr;
+       u16 dev_id;
+       int err;
+
+       ioat_quiesce(ioat_chan, msecs_to_jiffies(100));
+
+       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+       if (ioat_dma->version < IOAT_VER_3_3) {
+               /* clear any pending errors */
+               err = pci_read_config_dword(pdev,
+                               IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+               if (err) {
+                       dev_err(&pdev->dev,
+                               "channel error register unreachable\n");
+                       return err;
+               }
+               pci_write_config_dword(pdev,
+                               IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+               /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+                * (workaround for spurious config parity error after restart)
+                */
+               pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+               if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+                       pci_write_config_dword(pdev,
+                                              IOAT_PCI_DMAUNCERRSTS_OFFSET,
+                                              0x10);
+               }
+       }
+
+       if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+               ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+               ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+               ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+       }
+
+
+       err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
+       if (!err) {
+               if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+                       writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+                       writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+                       writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+               }
+       }
+
+       if (err)
+               dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
+       return err;
+}
diff --git a/kern/drivers/dma/ioat/dma.h b/kern/drivers/dma/ioat/dma.h
new file mode 100644 (file)
index 0000000..aaafd0e
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <linux/circ_buf.h>
+#include <linux/interrupt.h>
+#include "registers.h"
+#include "hw.h"
+
+#define IOAT_DMA_VERSION  "5.00"
+
+#define IOAT_DMA_DCA_ANY_CPU           ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev)
+#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev)
+#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80)
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+enum ioat_irq_mode {
+       IOAT_NOIRQ = 0,
+       IOAT_MSIX,
+       IOAT_MSI,
+       IOAT_INTX
+};
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @completion_pool: DMA buffers for completion ops
+ * @sed_hw_pool: DMA super descriptor pools
+ * @dma_dev: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @irq_mode: interrupt mode (INTX, MSI, MSIX)
+ * @cap: read DMA capabilities register
+ */
+struct ioatdma_device {
+       struct pci_dev *pdev;
+       void __iomem *reg_base;
+       struct dma_pool *completion_pool;
+#define MAX_SED_POOLS  5
+       struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
+       struct dma_device dma_dev;
+       u8 version;
+#define IOAT_MAX_CHANS 4
+       struct msix_entry msix_entries[IOAT_MAX_CHANS];
+       struct ioatdma_chan *idx[IOAT_MAX_CHANS];
+       struct dca_provider *dca;
+       enum ioat_irq_mode irq_mode;
+       u32 cap;
+
+       /* shadow version for CB3.3 chan reset errata workaround */
+       u64 msixtba0;
+       u64 msixdata0;
+       u32 msixpba;
+};
+
+struct ioat_descs {
+       void *virt;
+       dma_addr_t hw;
+};
+
+struct ioatdma_chan {
+       struct dma_chan dma_chan;
+       void __iomem *reg_base;
+       dma_addr_t last_completion;
+       spinlock_t cleanup_lock;
+       unsigned long state;
+       #define IOAT_CHAN_DOWN 0
+       #define IOAT_COMPLETION_ACK 1
+       #define IOAT_RESET_PENDING 2
+       #define IOAT_KOBJ_INIT_FAIL 3
+       #define IOAT_RUN 5
+       #define IOAT_CHAN_ACTIVE 6
+       struct timer_list timer;
+       #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+       #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+       #define RESET_DELAY msecs_to_jiffies(100)
+       struct ioatdma_device *ioat_dma;
+       dma_addr_t completion_dma;
+       u64 *completion;
+       struct tasklet_struct cleanup_task;
+       struct kobject kobj;
+
+/* ioat v2 / v3 channel attributes
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @produce: number of descriptors to produce at submit time
+ * @ring: software ring buffer implementation of hardware ring
+ * @prep_lock: serializes descriptor preparation (producers)
+ */
+       size_t xfercap_log;
+       u16 head;
+       u16 issued;
+       u16 tail;
+       u16 dmacount;
+       u16 alloc_order;
+       u16 produce;
+       struct ioat_ring_ent **ring;
+       spinlock_t prep_lock;
+       struct ioat_descs descs[2];
+       int desc_chunks;
+       int intr_coalesce;
+       int prev_intr_coalesce;
+};
+
+struct ioat_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(struct dma_chan *, char *);
+       ssize_t (*store)(struct dma_chan *, const char *, size_t);
+};
+
+/**
+ * struct ioat_sed_ent - wrapper around super extended hardware descriptor
+ * @hw: hardware SED
+ * @dma: dma address for the SED
+ * @parent: point to the dma descriptor that's the parent
+ * @hw_pool: descriptor pool index
+ */
+struct ioat_sed_ent {
+       struct ioat_sed_raw_descriptor *hw;
+       dma_addr_t dma;
+       struct ioat_ring_ent *parent;
+       unsigned int hw_pool;
+};
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ * @sed: pointer to super extended descriptor sw desc
+ */
+
+struct ioat_ring_ent {
+       union {
+               struct ioat_dma_descriptor *hw;
+               struct ioat_xor_descriptor *xor;
+               struct ioat_xor_ext_descriptor *xor_ex;
+               struct ioat_pq_descriptor *pq;
+               struct ioat_pq_ext_descriptor *pq_ex;
+               struct ioat_pq_update_descriptor *pqu;
+               struct ioat_raw_descriptor *raw;
+       };
+       size_t len;
+       struct dma_async_tx_descriptor txd;
+       enum sum_check_flags *result;
+       #ifdef DEBUG
+       int id;
+       #endif
+       struct ioat_sed_ent *sed;
+};
+
+extern const struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+extern struct kobj_type ioat_ktype;
+extern struct kmem_cache *ioat_cache;
+extern int ioat_ring_max_alloc_order;
+extern struct kmem_cache *ioat_sed_cache;
+
+static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)
+{
+       return container_of(c, struct ioatdma_chan, dma_chan);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw,
+               struct dma_async_tx_descriptor *tx, int id)
+{
+       struct device *dev = to_dev(ioat_chan);
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+               " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id,
+               (unsigned long long) tx->phys,
+               (unsigned long long) hw->next, tx->cookie, tx->flags,
+               hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+       ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline struct ioatdma_chan *
+ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index)
+{
+       return ioat_dma->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
+{
+       return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET);
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+       return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan)
+{
+       return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioatdma_chan *ioat_chan)
+{
+       u8 ver = ioat_chan->ioat_dma->version;
+
+       writeb(IOAT_CHANCMD_SUSPEND,
+              ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_reset(struct ioatdma_chan *ioat_chan)
+{
+       u8 ver = ioat_chan->ioat_dma->version;
+
+       writeb(IOAT_CHANCMD_RESET,
+              ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan)
+{
+       u8 ver = ioat_chan->ioat_dma->version;
+       u8 cmd;
+
+       cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+       return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+       return !!err;
+}
+
+#define IOAT_MAX_ORDER 16
+#define IOAT_MAX_DESCS 65536
+#define IOAT_DESCS_PER_2M 32768
+
+static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
+{
+       return 1 << ioat_chan->alloc_order;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan)
+{
+       return CIRC_CNT(ioat_chan->head, ioat_chan->tail,
+                       ioat_ring_size(ioat_chan));
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan)
+{
+       return CIRC_CNT(ioat_chan->head, ioat_chan->issued,
+                       ioat_ring_size(ioat_chan));
+}
+
+static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan)
+{
+       return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan);
+}
+
+static inline u16
+ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len)
+{
+       u16 num_descs = len >> ioat_chan->xfercap_log;
+
+       num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1));
+       return num_descs;
+}
+
+static inline struct ioat_ring_ent *
+ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx)
+{
+       return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)];
+}
+
+static inline void
+ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr)
+{
+       writel(addr & 0x00000000FFFFFFFF,
+              ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+       writel(addr >> 32,
+              ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+/* IOAT Prep functions */
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+              unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+                   unsigned int src_cnt, size_t len,
+                   enum sum_check_flags *result, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+             unsigned int src_cnt, const unsigned char *scf, size_t len,
+             unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                 unsigned int src_cnt, const unsigned char *scf, size_t len,
+                 enum sum_check_flags *pqres, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+                unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+                    unsigned int src_cnt, size_t len,
+                    enum sum_check_flags *result, unsigned long flags);
+
+/* IOAT Operation functions */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data);
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data);
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags);
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan);
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan);
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan);
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+               struct dma_tx_state *txstate);
+void ioat_cleanup_event(unsigned long data);
+void ioat_timer_event(struct timer_list *t);
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs);
+void ioat_issue_pending(struct dma_chan *chan);
+
+/* IOAT Init functions */
+bool is_bwd_ioat(struct pci_dev *pdev);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *ioat_dma);
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma);
+void ioat_stop(struct ioatdma_chan *ioat_chan);
+#endif /* IOATDMA_H */
diff --git a/kern/drivers/dma/ioat/hw.h b/kern/drivers/dma/ioat/hw.h
new file mode 100644 (file)
index 0000000..781c94d
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_MMIO_BAR          0
+
+/* CB device ID's */
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0  0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1  0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2  0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3  0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4  0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5  0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6  0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7  0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8  0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9  0x0e2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0  0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1  0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2  0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3  0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4  0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5  0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6  0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7  0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8  0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9  0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0  0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1  0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2  0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3  0x0C53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0        0x6f50
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1        0x6f51
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2        0x6f52
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3        0x6f53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0  0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1  0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2  0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3  0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4  0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5  0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6  0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7  0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8  0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9  0x6f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX   0x2021
+
+#define PCI_DEVICE_ID_INTEL_IOAT_ICX   0x0b00
+
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+#define IOAT_VER_3_3            0x33    /* Version 3.3 */
+#define IOAT_VER_3_4           0x34    /* Version 3.4 */
+
+
+int system_has_dca_enabled(struct pci_dev *pdev);
+
+#define IOAT_DESC_SZ   64
+
+struct ioat_dma_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int null:1;
+                       unsigned int src_brk:1;
+                       unsigned int dest_brk:1;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd2:13;
+                       #define IOAT_OP_COPY 0x00
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        rsv2;
+       /* store some driver data in an unused portion of the descriptor */
+       union {
+               uint64_t        user1;
+               uint64_t        tx_cnt;
+       };
+       uint64_t        user2;
+};
+
+struct ioat_xor_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd:13;
+                       #define IOAT_OP_XOR 0x87
+                       #define IOAT_OP_XOR_VAL 0x88
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        src_addr3;
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+       uint64_t        src_addr6;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        next;
+       uint64_t        rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+       union {
+               uint32_t        size;
+               uint32_t        dwbes;
+               struct {
+                       unsigned int rsvd:25;
+                       unsigned int p_val_err:1;
+                       unsigned int q_val_err:1;
+                       unsigned int rsvd1:4;
+                       unsigned int wbes:1;
+               } dwbes_f;
+       };
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd2:2;
+                       unsigned int wb_en:1;
+                       unsigned int prl_en:1;
+                       unsigned int rsvd3:7;
+                       #define IOAT_OP_PQ 0x89
+                       #define IOAT_OP_PQ_VAL 0x8a
+                       #define IOAT_OP_PQ_16S 0xa0
+                       #define IOAT_OP_PQ_VAL_16S 0xa1
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       union {
+               uint64_t        src_addr3;
+               uint64_t        sed_addr;
+       };
+       uint8_t         coef[8];
+       uint64_t        q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+       uint64_t        src_addr6;
+       uint64_t        next;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd:3;
+                       unsigned int coef:8;
+                       #define IOAT_OP_PQ_UP 0x8b
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        p_src;
+       uint64_t        q_src;
+       uint64_t        q_addr;
+};
+
+struct ioat_raw_descriptor {
+       uint64_t        field[8];
+};
+
+struct ioat_pq16a_descriptor {
+       uint8_t coef[8];
+       uint64_t src_addr3;
+       uint64_t src_addr4;
+       uint64_t src_addr5;
+       uint64_t src_addr6;
+       uint64_t src_addr7;
+       uint64_t src_addr8;
+       uint64_t src_addr9;
+};
+
+struct ioat_pq16b_descriptor {
+       uint64_t src_addr10;
+       uint64_t src_addr11;
+       uint64_t src_addr12;
+       uint64_t src_addr13;
+       uint64_t src_addr14;
+       uint64_t src_addr15;
+       uint64_t src_addr16;
+       uint64_t rsvd;
+};
+
+union ioat_sed_pq_descriptor {
+       struct ioat_pq16a_descriptor a;
+       struct ioat_pq16b_descriptor b;
+};
+
+#define SED_SIZE       64
+
+struct ioat_sed_raw_descriptor {
+       uint64_t        a[8];
+       uint64_t        b[8];
+       uint64_t        c[8];
+};
+
+#endif
diff --git a/kern/drivers/dma/ioat/init.c b/kern/drivers/dma/ioat/init.c
new file mode 100644 (file)
index 0000000..d41dc9a
--- /dev/null
@@ -0,0 +1,1463 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/dca.h>
+#include <linux/aer.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static const struct pci_device_id ioat_pci_tbl[] = {
+       /* I/OAT v3 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+       /* I/OAT v3.2 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) },
+
+       /* I/OAT v3.3 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
+
+       /* I/OAT v3.4 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_ICX) },
+
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void ioat_remove(struct pci_dev *pdev);
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+                 struct ioatdma_chan *ioat_chan, int idx);
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma);
+static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma);
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+int ioat_pending_level = 7;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+                "high-water mark for pushing ioat descriptors (default: 7)");
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+                   sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+                "set ioat interrupt style: msix (default), msi, intx");
+
+struct kmem_cache *ioat_cache;
+struct kmem_cache *ioat_sed_cache;
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+       case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+       case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool is_ivb_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
+       case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
+               return true;
+       default:
+               return false;
+       }
+
+}
+
+static bool is_hsw_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
+       case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
+               return true;
+       default:
+               return false;
+       }
+
+}
+
+static bool is_bdx_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX0:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX1:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX2:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX3:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX4:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX5:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX6:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX7:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX8:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDX9:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool is_skx_ioat(struct pci_dev *pdev)
+{
+       return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false;
+}
+
+static bool is_xeon_cb32(struct pci_dev *pdev)
+{
+       return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
+               is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev);
+}
+
+bool is_bwd_ioat(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+       /* even though not Atom, BDX-DE has same DMA silicon */
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool is_bwd_noraid(struct pci_dev *pdev)
+{
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+       case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+       case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+               return true;
+       default:
+               return false;
+       }
+
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+       struct completion *cmp = dma_async_param;
+
+       complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @ioat_dma: dma device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+       int i;
+       u8 *src;
+       u8 *dest;
+       struct dma_device *dma = &ioat_dma->dma_dev;
+       struct device *dev = &ioat_dma->pdev->dev;
+       struct dma_chan *dma_chan;
+       struct dma_async_tx_descriptor *tx;
+       dma_addr_t dma_dest, dma_src;
+       dma_cookie_t cookie;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       unsigned long flags;
+
+       src = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!src)
+               return -ENOMEM;
+       dest = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!dest) {
+               kfree(src);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffer */
+       for (i = 0; i < IOAT_TEST_SIZE; i++)
+               src[i] = (u8)i;
+
+       /* Start copy, using first DMA channel */
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               dev_err(dev, "selftest cannot allocate chan resource\n");
+               err = -ENODEV;
+               goto out;
+       }
+
+       dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma_src)) {
+               dev_err(dev, "mapping src buffer failed\n");
+               err = -ENOMEM;
+               goto free_resources;
+       }
+       dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+       if (dma_mapping_error(dev, dma_dest)) {
+               dev_err(dev, "mapping dest buffer failed\n");
+               err = -ENOMEM;
+               goto unmap_src;
+       }
+       flags = DMA_PREP_INTERRUPT;
+       tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest,
+                                                     dma_src, IOAT_TEST_SIZE,
+                                                     flags);
+       if (!tx) {
+               dev_err(dev, "Self-test prep failed, disabling\n");
+               err = -ENODEV;
+               goto unmap_dma;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test setup failed, disabling\n");
+               err = -ENODEV;
+               goto unmap_dma;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_tx_status(dma_chan, cookie, NULL)
+                                       != DMA_COMPLETE) {
+               dev_err(dev, "Self-test copy timed out, disabling\n");
+               err = -ENODEV;
+               goto unmap_dma;
+       }
+       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+               dev_err(dev, "Self-test copy failed compare, disabling\n");
+               err = -ENODEV;
+               goto unmap_dma;
+       }
+
+unmap_dma:
+       dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+unmap_src:
+       dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       kfree(src);
+       kfree(dest);
+       return err;
+}
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @ioat_dma: ioat dma device
+ */
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma)
+{
+       struct ioatdma_chan *ioat_chan;
+       struct pci_dev *pdev = ioat_dma->pdev;
+       struct device *dev = &pdev->dev;
+       struct msix_entry *msix;
+       int i, j, msixcnt;
+       int err = -EINVAL;
+       u8 intrctrl = 0;
+
+       if (!strcmp(ioat_interrupt_style, "msix"))
+               goto msix;
+       if (!strcmp(ioat_interrupt_style, "msi"))
+               goto msi;
+       if (!strcmp(ioat_interrupt_style, "intx"))
+               goto intx;
+       dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+       goto err_no_irq;
+
+msix:
+       /* The number of MSI-X vectors should equal the number of channels */
+       msixcnt = ioat_dma->dma_dev.chancnt;
+       for (i = 0; i < msixcnt; i++)
+               ioat_dma->msix_entries[i].entry = i;
+
+       err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt);
+       if (err)
+               goto msi;
+
+       for (i = 0; i < msixcnt; i++) {
+               msix = &ioat_dma->msix_entries[i];
+               ioat_chan = ioat_chan_by_index(ioat_dma, i);
+               err = devm_request_irq(dev, msix->vector,
+                                      ioat_dma_do_interrupt_msix, 0,
+                                      "ioat-msix", ioat_chan);
+               if (err) {
+                       for (j = 0; j < i; j++) {
+                               msix = &ioat_dma->msix_entries[j];
+                               ioat_chan = ioat_chan_by_index(ioat_dma, j);
+                               devm_free_irq(dev, msix->vector, ioat_chan);
+                       }
+                       goto msi;
+               }
+       }
+       intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+       ioat_dma->irq_mode = IOAT_MSIX;
+       goto done;
+
+msi:
+       err = pci_enable_msi(pdev);
+       if (err)
+               goto intx;
+
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+                              "ioat-msi", ioat_dma);
+       if (err) {
+               pci_disable_msi(pdev);
+               goto intx;
+       }
+       ioat_dma->irq_mode = IOAT_MSI;
+       goto done;
+
+intx:
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+                              IRQF_SHARED, "ioat-intx", ioat_dma);
+       if (err)
+               goto err_no_irq;
+
+       ioat_dma->irq_mode = IOAT_INTX;
+done:
+       if (is_bwd_ioat(pdev))
+               ioat_intr_quirk(ioat_dma);
+       intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+       writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+       return 0;
+
+err_no_irq:
+       /* Disable all interrupt generation */
+       writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+       ioat_dma->irq_mode = IOAT_NOIRQ;
+       dev_err(dev, "no usable interrupts\n");
+       return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma)
+{
+       /* Disable all interrupt generation */
+       writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+static int ioat_probe(struct ioatdma_device *ioat_dma)
+{
+       int err = -ENODEV;
+       struct dma_device *dma = &ioat_dma->dma_dev;
+       struct pci_dev *pdev = ioat_dma->pdev;
+       struct device *dev = &pdev->dev;
+
+       ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
+                                                   sizeof(u64),
+                                                   SMP_CACHE_BYTES,
+                                                   SMP_CACHE_BYTES);
+
+       if (!ioat_dma->completion_pool) {
+               err = -ENOMEM;
+               goto err_out;
+       }
+
+       ioat_enumerate_channels(ioat_dma);
+
+       dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+       dma->dev = &pdev->dev;
+
+       if (!dma->chancnt) {
+               dev_err(dev, "channel enumeration error\n");
+               goto err_setup_interrupts;
+       }
+
+       err = ioat_dma_setup_interrupts(ioat_dma);
+       if (err)
+               goto err_setup_interrupts;
+
+       err = ioat3_dma_self_test(ioat_dma);
+       if (err)
+               goto err_self_test;
+
+       return 0;
+
+err_self_test:
+       ioat_disable_interrupts(ioat_dma);
+err_setup_interrupts:
+       dma_pool_destroy(ioat_dma->completion_pool);
+err_out:
+       return err;
+}
+
+static int ioat_register(struct ioatdma_device *ioat_dma)
+{
+       int err = dma_async_device_register(&ioat_dma->dma_dev);
+
+       if (err) {
+               ioat_disable_interrupts(ioat_dma);
+               dma_pool_destroy(ioat_dma->completion_pool);
+       }
+
+       return err;
+}
+
+static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
+{
+       struct dma_device *dma = &ioat_dma->dma_dev;
+
+       ioat_disable_interrupts(ioat_dma);
+
+       ioat_kobject_del(ioat_dma);
+
+       dma_async_device_unregister(dma);
+
+       dma_pool_destroy(ioat_dma->completion_pool);
+
+       INIT_LIST_HEAD(&dma->channels);
+}
+
+/**
+ * ioat_enumerate_channels - find and initialize the device's channels
+ * @ioat_dma: the ioat dma device to be enumerated
+ */
+static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
+{
+       struct ioatdma_chan *ioat_chan;
+       struct device *dev = &ioat_dma->pdev->dev;
+       struct dma_device *dma = &ioat_dma->dma_dev;
+       u8 xfercap_log;
+       int i;
+
+       INIT_LIST_HEAD(&dma->channels);
+       dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET);
+       dma->chancnt &= 0x1f; /* bits [4:0] valid */
+       if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) {
+               dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+                        dma->chancnt, ARRAY_SIZE(ioat_dma->idx));
+               dma->chancnt = ARRAY_SIZE(ioat_dma->idx);
+       }
+       xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET);
+       xfercap_log &= 0x1f; /* bits [4:0] valid */
+       if (xfercap_log == 0)
+               return;
+       dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+       for (i = 0; i < dma->chancnt; i++) {
+               ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
+               if (!ioat_chan)
+                       break;
+
+               ioat_init_channel(ioat_dma, ioat_chan, i);
+               ioat_chan->xfercap_log = xfercap_log;
+               spin_lock_init(&ioat_chan->prep_lock);
+               if (ioat_reset_hw(ioat_chan)) {
+                       i = 0;
+                       break;
+               }
+       }
+       dma->chancnt = i;
+}
+
+/**
+ * ioat_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat_free_chan_resources(struct dma_chan *c)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *desc;
+       const int total_descs = 1 << ioat_chan->alloc_order;
+       int descs;
+       int i;
+
+       /* Before freeing channel resources first check
+        * if they have been previously allocated for this channel.
+        */
+       if (!ioat_chan->ring)
+               return;
+
+       ioat_stop(ioat_chan);
+       ioat_reset_hw(ioat_chan);
+
+       /* Put LTR to idle */
+       if (ioat_dma->version >= IOAT_VER_3_4)
+               writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
+                       ioat_chan->reg_base + IOAT_CHAN_LTR_SWSEL_OFFSET);
+
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+       spin_lock_bh(&ioat_chan->prep_lock);
+       descs = ioat_ring_space(ioat_chan);
+       dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs);
+       for (i = 0; i < descs; i++) {
+               desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i);
+               ioat_free_ring_ent(desc, c);
+       }
+
+       if (descs < total_descs)
+               dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n",
+                       total_descs - descs);
+
+       for (i = 0; i < total_descs - descs; i++) {
+               desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i);
+               dump_desc_dbg(ioat_chan, desc);
+               ioat_free_ring_ent(desc, c);
+       }
+
+       for (i = 0; i < ioat_chan->desc_chunks; i++) {
+               dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+                                 ioat_chan->descs[i].virt,
+                                 ioat_chan->descs[i].hw);
+               ioat_chan->descs[i].virt = NULL;
+               ioat_chan->descs[i].hw = 0;
+       }
+       ioat_chan->desc_chunks = 0;
+
+       kfree(ioat_chan->ring);
+       ioat_chan->ring = NULL;
+       ioat_chan->alloc_order = 0;
+       dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
+                     ioat_chan->completion_dma);
+       spin_unlock_bh(&ioat_chan->prep_lock);
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+       ioat_chan->last_completion = 0;
+       ioat_chan->completion_dma = 0;
+       ioat_chan->dmacount = 0;
+}
+
+/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring
+ * @chan: channel to be initialized
+ */
+static int ioat_alloc_chan_resources(struct dma_chan *c)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioat_ring_ent **ring;
+       u64 status;
+       int order;
+       int i = 0;
+       u32 chanerr;
+
+       /* have we already been set up? */
+       if (ioat_chan->ring)
+               return 1 << ioat_chan->alloc_order;
+
+       /* Setup register to interrupt and write completion status on error */
+       writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       ioat_chan->completion =
+               dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
+                               GFP_NOWAIT, &ioat_chan->completion_dma);
+       if (!ioat_chan->completion)
+               return -ENOMEM;
+
+       writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF,
+              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(((u64)ioat_chan->completion_dma) >> 32,
+              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       order = IOAT_MAX_ORDER;
+       ring = ioat_alloc_ring(c, order, GFP_NOWAIT);
+       if (!ring)
+               return -ENOMEM;
+
+       spin_lock_bh(&ioat_chan->cleanup_lock);
+       spin_lock_bh(&ioat_chan->prep_lock);
+       ioat_chan->ring = ring;
+       ioat_chan->head = 0;
+       ioat_chan->issued = 0;
+       ioat_chan->tail = 0;
+       ioat_chan->alloc_order = order;
+       set_bit(IOAT_RUN, &ioat_chan->state);
+       spin_unlock_bh(&ioat_chan->prep_lock);
+       spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+       /* Setting up LTR values for 3.4 or later */
+       if (ioat_chan->ioat_dma->version >= IOAT_VER_3_4) {
+               u32 lat_val;
+
+               lat_val = IOAT_CHAN_LTR_ACTIVE_SNVAL |
+                       IOAT_CHAN_LTR_ACTIVE_SNLATSCALE |
+                       IOAT_CHAN_LTR_ACTIVE_SNREQMNT;
+               writel(lat_val, ioat_chan->reg_base +
+                               IOAT_CHAN_LTR_ACTIVE_OFFSET);
+
+               lat_val = IOAT_CHAN_LTR_IDLE_SNVAL |
+                         IOAT_CHAN_LTR_IDLE_SNLATSCALE |
+                         IOAT_CHAN_LTR_IDLE_SNREQMNT;
+               writel(lat_val, ioat_chan->reg_base +
+                               IOAT_CHAN_LTR_IDLE_OFFSET);
+
+               /* Select to active */
+               writeb(IOAT_CHAN_LTR_SWSEL_ACTIVE,
+                      ioat_chan->reg_base +
+                      IOAT_CHAN_LTR_SWSEL_OFFSET);
+       }
+
+       ioat_start_null_desc(ioat_chan);
+
+       /* check that we got off the ground */
+       do {
+               udelay(1);
+               status = ioat_chansts(ioat_chan);
+       } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               return 1 << ioat_chan->alloc_order;
+
+       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+       dev_WARN(to_dev(ioat_chan),
+                "failed to start channel chanerr: %#x\n", chanerr);
+       ioat_free_chan_resources(c);
+       return -EFAULT;
+}
+
+/* common channel initialization */
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+                 struct ioatdma_chan *ioat_chan, int idx)
+{
+       struct dma_device *dma = &ioat_dma->dma_dev;
+       struct dma_chan *c = &ioat_chan->dma_chan;
+       unsigned long data = (unsigned long) c;
+
+       ioat_chan->ioat_dma = ioat_dma;
+       ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1));
+       spin_lock_init(&ioat_chan->cleanup_lock);
+       ioat_chan->dma_chan.device = dma;
+       dma_cookie_init(&ioat_chan->dma_chan);
+       list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
+       ioat_dma->idx[idx] = ioat_chan;
+       timer_setup(&ioat_chan->timer, ioat_timer_event, 0);
+       tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma)
+{
+       int i, src_idx;
+       struct page *dest;
+       struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+       struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dest_dma;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u8 cmp_byte = 0;
+       u32 cmp_word;
+       u32 xor_val_result;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       struct device *dev = &ioat_dma->pdev->dev;
+       struct dma_device *dma = &ioat_dma->dma_dev;
+       u8 op = 0;
+
+       dev_dbg(dev, "%s\n", __func__);
+
+       if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+               return 0;
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+               if (!xor_srcs[src_idx]) {
+                       while (src_idx--)
+                               __free_page(xor_srcs[src_idx]);
+                       return -ENOMEM;
+               }
+       }
+
+       dest = alloc_page(GFP_KERNEL);
+       if (!dest) {
+               while (src_idx--)
+                       __free_page(xor_srcs[src_idx]);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffers */
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               u8 *ptr = page_address(xor_srcs[src_idx]);
+
+               for (i = 0; i < PAGE_SIZE; i++)
+                       ptr[i] = (1 << src_idx);
+       }
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+               cmp_byte ^= (u8) (1 << src_idx);
+
+       cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+                       (cmp_byte << 8) | cmp_byte;
+
+       memset(page_address(dest), 0, PAGE_SIZE);
+
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       /* test xor */
+       op = IOAT_OP_XOR;
+
+       dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       if (dma_mapping_error(dev, dest_dma)) {
+               err = -ENOMEM;
+               goto free_resources;
+       }
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
+               dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+               if (dma_mapping_error(dev, dma_srcs[i])) {
+                       err = -ENOMEM;
+                       goto dma_unmap;
+               }
+       }
+       tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+                                     IOAT_NUM_SRC_TEST, PAGE_SIZE,
+                                     DMA_PREP_INTERRUPT);
+
+       if (!tx) {
+               dev_err(dev, "Self-test xor prep failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test xor setup failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+               dev_err(dev, "Self-test xor timed out\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+       dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+               u32 *ptr = page_address(dest);
+
+               if (ptr[i] != cmp_word) {
+                       dev_err(dev, "Self-test xor failed compare\n");
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+       dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+       dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+       /* skip validate if the capability is not present */
+       if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+               goto free_resources;
+
+       op = IOAT_OP_XOR_VAL;
+
+       /* validate the sources with the destintation page */
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               xor_val_srcs[i] = xor_srcs[i];
+       xor_val_srcs[i] = dest;
+
+       xor_val_result = 1;
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+               if (dma_mapping_error(dev, dma_srcs[i])) {
+                       err = -ENOMEM;
+                       goto dma_unmap;
+               }
+       }
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test zero prep failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test zero setup failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+               dev_err(dev, "Self-test validate timed out\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+       if (xor_val_result != 0) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       memset(page_address(dest), 0, PAGE_SIZE);
+
+       /* test for non-zero parity sum */
+       op = IOAT_OP_XOR_VAL;
+
+       xor_val_result = 0;
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+               if (dma_mapping_error(dev, dma_srcs[i])) {
+                       err = -ENOMEM;
+                       goto dma_unmap;
+               }
+       }
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test 2nd zero prep failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test  2nd zero setup failed\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+               dev_err(dev, "Self-test 2nd validate timed out\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       if (xor_val_result != SUM_CHECK_P_RESULT) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto dma_unmap;
+       }
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+       goto free_resources;
+dma_unmap:
+       if (op == IOAT_OP_XOR) {
+               while (--i >= 0)
+                       dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+                                      DMA_TO_DEVICE);
+               dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+       } else if (op == IOAT_OP_XOR_VAL) {
+               while (--i >= 0)
+                       dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+                                      DMA_TO_DEVICE);
+       }
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       src_idx = IOAT_NUM_SRC_TEST;
+       while (src_idx--)
+               __free_page(xor_srcs[src_idx]);
+       __free_page(dest);
+       return err;
+}
+
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+       int rc;
+
+       rc = ioat_dma_self_test(ioat_dma);
+       if (rc)
+               return rc;
+
+       rc = ioat_xor_val_self_test(ioat_dma);
+
+       return rc;
+}
+
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma)
+{
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioatdma_chan *ioat_chan;
+       u32 errmask;
+
+       dma = &ioat_dma->dma_dev;
+
+       /*
+        * if we have descriptor write back error status, we mask the
+        * error interrupts
+        */
+       if (ioat_dma->cap & IOAT_CAP_DWBES) {
+               list_for_each_entry(c, &dma->channels, device_node) {
+                       ioat_chan = to_ioat_chan(c);
+                       errmask = readl(ioat_chan->reg_base +
+                                       IOAT_CHANERR_MASK_OFFSET);
+                       errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
+                                  IOAT_CHANERR_XOR_Q_ERR;
+                       writel(errmask, ioat_chan->reg_base +
+                                       IOAT_CHANERR_MASK_OFFSET);
+               }
+       }
+}
+
+static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
+{
+       struct pci_dev *pdev = ioat_dma->pdev;
+       int dca_en = system_has_dca_enabled(pdev);
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioatdma_chan *ioat_chan;
+       int err;
+       u16 val16;
+
+       dma = &ioat_dma->dma_dev;
+       dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock;
+       dma->device_issue_pending = ioat_issue_pending;
+       dma->device_alloc_chan_resources = ioat_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat_free_chan_resources;
+
+       dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+       dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock;
+
+       ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET);
+
+       if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
+               ioat_dma->cap &=
+                       ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
+
+       /* dca is incompatible with raid operations */
+       if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+               ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+       if (ioat_dma->cap & IOAT_CAP_XOR) {
+               dma->max_xor = 8;
+
+               dma_cap_set(DMA_XOR, dma->cap_mask);
+               dma->device_prep_dma_xor = ioat_prep_xor;
+
+               dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+               dma->device_prep_dma_xor_val = ioat_prep_xor_val;
+       }
+
+       if (ioat_dma->cap & IOAT_CAP_PQ) {
+
+               dma->device_prep_dma_pq = ioat_prep_pq;
+               dma->device_prep_dma_pq_val = ioat_prep_pq_val;
+               dma_cap_set(DMA_PQ, dma->cap_mask);
+               dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+
+               if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+                       dma_set_maxpq(dma, 16, 0);
+               else
+                       dma_set_maxpq(dma, 8, 0);
+
+               if (!(ioat_dma->cap & IOAT_CAP_XOR)) {
+                       dma->device_prep_dma_xor = ioat_prep_pqxor;
+                       dma->device_prep_dma_xor_val = ioat_prep_pqxor_val;
+                       dma_cap_set(DMA_XOR, dma->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+
+                       if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+                               dma->max_xor = 16;
+                       else
+                               dma->max_xor = 8;
+               }
+       }
+
+       dma->device_tx_status = ioat_tx_status;
+
+       /* starting with CB3.3 super extended descriptors are supported */
+       if (ioat_dma->cap & IOAT_CAP_RAID16SS) {
+               char pool_name[14];
+               int i;
+
+               for (i = 0; i < MAX_SED_POOLS; i++) {
+                       snprintf(pool_name, 14, "ioat_hw%d_sed", i);
+
+                       /* allocate SED DMA pool */
+                       ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name,
+                                       &pdev->dev,
+                                       SED_SIZE * (i + 1), 64, 0);
+                       if (!ioat_dma->sed_hw_pool[i])
+                               return -ENOMEM;
+
+               }
+       }
+
+       if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ)))
+               dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+       err = ioat_probe(ioat_dma);
+       if (err)
+               return err;
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               ioat_chan = to_ioat_chan(c);
+               writel(IOAT_DMA_DCA_ANY_CPU,
+                      ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+       }
+
+       err = ioat_register(ioat_dma);
+       if (err)
+               return err;
+
+       ioat_kobject_add(ioat_dma, &ioat_ktype);
+
+       if (dca)
+               ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
+
+       /* disable relaxed ordering */
+       err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16);
+       if (err)
+               return err;
+
+       /* clear relaxed ordering enable */
+       val16 &= ~IOAT_DEVCTRL_ROE;
+       err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16);
+       if (err)
+               return err;
+
+       if (ioat_dma->cap & IOAT_CAP_DPS)
+               writeb(ioat_pending_level + 1,
+                      ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET);
+
+       return 0;
+}
+
+static void ioat_shutdown(struct pci_dev *pdev)
+{
+       struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+       struct ioatdma_chan *ioat_chan;
+       int i;
+
+       if (!ioat_dma)
+               return;
+
+       for (i = 0; i < IOAT_MAX_CHANS; i++) {
+               ioat_chan = ioat_dma->idx[i];
+               if (!ioat_chan)
+                       continue;
+
+               spin_lock_bh(&ioat_chan->prep_lock);
+               set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+               /*
+                * Synchronization rule for del_timer_sync():
+                *  - The caller must not hold locks which would prevent
+                *    completion of the timer's handler.
+                * So prep_lock cannot be held before calling it.
+                */
+               del_timer_sync(&ioat_chan->timer);
+
+               /* this should quiesce then reset */
+               ioat_reset_hw(ioat_chan);
+       }
+
+       ioat_disable_interrupts(ioat_dma);
+}
+
+static void ioat_resume(struct ioatdma_device *ioat_dma)
+{
+       struct ioatdma_chan *ioat_chan;
+       u32 chanerr;
+       int i;
+
+       for (i = 0; i < IOAT_MAX_CHANS; i++) {
+               ioat_chan = ioat_dma->idx[i];
+               if (!ioat_chan)
+                       continue;
+
+               spin_lock_bh(&ioat_chan->prep_lock);
+               clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+
+               chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+               /* no need to reset as shutdown already did that */
+       }
+}
+
+#define DRV_NAME "ioatdma"
+
+static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev,
+                                                enum pci_channel_state error)
+{
+       dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error);
+
+       /* quiesce and block I/O */
+       ioat_shutdown(pdev);
+
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev)
+{
+       pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+
+       dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME);
+
+       if (pci_enable_device_mem(pdev) < 0) {
+               dev_err(&pdev->dev,
+                       "Failed to enable PCIe device after reset.\n");
+               result = PCI_ERS_RESULT_DISCONNECT;
+       } else {
+               pci_set_master(pdev);
+               pci_restore_state(pdev);
+               pci_save_state(pdev);
+               pci_wake_from_d3(pdev, false);
+       }
+
+       return result;
+}
+
+static void ioat_pcie_error_resume(struct pci_dev *pdev)
+{
+       struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+
+       dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME);
+
+       /* initialize and bring everything back */
+       ioat_resume(ioat_dma);
+}
+
+static const struct pci_error_handlers ioat_err_handler = {
+       .error_detected = ioat_pcie_error_detected,
+       .slot_reset = ioat_pcie_error_slot_reset,
+       .resume = ioat_pcie_error_resume,
+};
+
+static struct pci_driver ioat_pci_driver = {
+       .name           = DRV_NAME,
+       .id_table       = ioat_pci_tbl,
+       .probe          = ioat_pci_probe,
+       .remove         = ioat_remove,
+       .shutdown       = ioat_shutdown,
+       .err_handler    = &ioat_err_handler,
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+       if (!d)
+               return NULL;
+       d->pdev = pdev;
+       d->reg_base = iobase;
+       return d;
+}
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       void __iomem * const *iomap;
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *device;
+       int err;
+
+       err = pcim_enable_device(pdev);
+       if (err)
+               return err;
+
+       err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+       if (err)
+               return err;
+       iomap = pcim_iomap_table(pdev);
+       if (!iomap)
+               return -ENOMEM;
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+       if (!device)
+               return -ENOMEM;
+       pci_set_master(pdev);
+       pci_set_drvdata(pdev, device);
+
+       device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+       if (device->version >= IOAT_VER_3_4)
+               ioat_dca_enabled = 0;
+       if (device->version >= IOAT_VER_3_0) {
+               if (is_skx_ioat(pdev))
+                       device->version = IOAT_VER_3_2;
+               err = ioat3_dma_probe(device, ioat_dca_enabled);
+
+               if (device->version >= IOAT_VER_3_3)
+                       pci_enable_pcie_error_reporting(pdev);
+       } else
+               return -ENODEV;
+
+       if (err) {
+               dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+               pci_disable_pcie_error_reporting(pdev);
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void ioat_remove(struct pci_dev *pdev)
+{
+       struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+       if (!device)
+               return;
+
+       dev_err(&pdev->dev, "Removing dma and dca services\n");
+       if (device->dca) {
+               unregister_dca_provider(device->dca, &pdev->dev);
+               free_dca_provider(device->dca);
+               device->dca = NULL;
+       }
+
+       pci_disable_pcie_error_reporting(pdev);
+       ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+       int err = -ENOMEM;
+
+       pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+               DRV_NAME, IOAT_DMA_VERSION);
+
+       ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent),
+                                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ioat_cache)
+               return -ENOMEM;
+
+       ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+       if (!ioat_sed_cache)
+               goto err_ioat_cache;
+
+       err = pci_register_driver(&ioat_pci_driver);
+       if (err)
+               goto err_ioat3_cache;
+
+       return 0;
+
+ err_ioat3_cache:
+       kmem_cache_destroy(ioat_sed_cache);
+
+ err_ioat_cache:
+       kmem_cache_destroy(ioat_cache);
+
+       return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+       pci_unregister_driver(&ioat_pci_driver);
+       kmem_cache_destroy(ioat_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/kern/drivers/dma/ioat/prep.c b/kern/drivers/dma/ioat/prep.c
new file mode 100644 (file)
index 0000000..243421a
--- /dev/null
@@ -0,0 +1,749 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+
+#define MAX_SCF        256
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc = 0xe0;
+static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+                                      2, 2, 2, 2, 2, 2, 2 };
+static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+                                       0, 1, 2, 3, 4, 5, 6 };
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+                       dma_addr_t addr, u32 offset, int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+       raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       return raw->field[pq_idx_to_field[idx]];
+}
+
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+       struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+       return raw->field[pq16_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+                      dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+       struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       raw->field[pq_idx_to_field[idx]] = addr + offset;
+       pq->coef[idx] = coef;
+}
+
+static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
+                       dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
+{
+       struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
+       struct ioat_pq16a_descriptor *pq16 =
+               (struct ioat_pq16a_descriptor *)desc[1];
+       struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+       raw->field[pq16_idx_to_field[idx]] = addr + offset;
+
+       if (idx < 8)
+               pq->coef[idx] = coef;
+       else
+               pq16->coef[idx - 8] = coef;
+}
+
+static struct ioat_sed_ent *
+ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
+{
+       struct ioat_sed_ent *sed;
+       gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
+
+       sed = kmem_cache_alloc(ioat_sed_cache, flags);
+       if (!sed)
+               return NULL;
+
+       sed->hw_pool = hw_pool;
+       sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
+                                flags, &sed->dma);
+       if (!sed->hw) {
+               kmem_cache_free(ioat_sed_cache, sed);
+               return NULL;
+       }
+
+       return sed;
+}
+
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       dma_addr_t dst = dma_dest;
+       dma_addr_t src = dma_src;
+       size_t total_len = len;
+       int num_descs, idx, i;
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+       if (likely(num_descs) &&
+           ioat_check_space_lock(ioat_chan, num_descs) == 0)
+               idx = ioat_chan->head;
+       else
+               return NULL;
+       i = 0;
+       do {
+               size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
+
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               hw = desc->hw;
+
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dst;
+
+               len -= copy;
+               dst += copy;
+               src += copy;
+               dump_desc_dbg(ioat_chan, desc);
+       } while (++i < num_descs);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       dump_desc_dbg(ioat_chan, desc);
+       /* we leave the channel locked to ensure in order submission */
+
+       return &desc->txd;
+}
+
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+                     dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+                     size_t len, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_xor_descriptor *xor;
+       struct ioat_xor_ext_descriptor *xor_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       int num_descs, with_ext, idx, i;
+       u32 offset = 0;
+       u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+       BUG_ON(src_cnt < 2);
+
+       num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+       /* we need 2x the number of descriptors to cover greater than 5
+        * sources
+        */
+       if (src_cnt > 5) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
+               idx = ioat_chan->head;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t,
+                                        len, 1 << ioat_chan->xfercap_log);
+               int s;
+
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               xor = desc->xor;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor xor_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
+               xor_ex = ext->xor_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) xor;
+               descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+               for (s = 0; s < src_cnt; s++)
+                       xor_set_src(descs, src[s], offset, s);
+               xor->size = xfer_size;
+               xor->dst_addr = dest + offset;
+               xor->ctl = 0;
+               xor->ctl_f.op = op;
+               xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+               len -= xfer_size;
+               offset += xfer_size;
+               dump_desc_dbg(ioat_chan, desc);
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last xor descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+       /* completion descriptor carries interrupt bit */
+       compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+       compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+       hw = compl_desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       dump_desc_dbg(ioat_chan, compl_desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &compl_desc->txd;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+              unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+                   unsigned int src_cnt, size_t len,
+                   enum sum_check_flags *result, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
+                                    src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
+                struct ioat_ring_ent *ext)
+{
+       struct device *dev = to_dev(ioat_chan);
+       struct ioat_pq_descriptor *pq = desc->pq;
+       struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+       struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+       int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+       int i;
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+               " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+               " src_cnt: %d)\n",
+               desc_id(desc), (unsigned long long) desc->txd.phys,
+               (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+               desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
+               pq->ctl_f.int_en, pq->ctl_f.compl_write,
+               pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+               pq->ctl_f.src_cnt);
+       for (i = 0; i < src_cnt; i++)
+               dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+                       (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+       dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+       dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+       dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
+}
+
+static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
+                              struct ioat_ring_ent *desc)
+{
+       struct device *dev = to_dev(ioat_chan);
+       struct ioat_pq_descriptor *pq = desc->pq;
+       struct ioat_raw_descriptor *descs[] = { (void *)pq,
+                                               (void *)pq,
+                                               (void *)pq };
+       int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+       int i;
+
+       if (desc->sed) {
+               descs[1] = (void *)desc->sed->hw;
+               descs[2] = (void *)desc->sed->hw + 64;
+       }
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+               " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+               " src_cnt: %d)\n",
+               desc_id(desc), (unsigned long long) desc->txd.phys,
+               (unsigned long long) pq->next,
+               desc->txd.flags, pq->size, pq->ctl,
+               pq->ctl_f.op, pq->ctl_f.int_en,
+               pq->ctl_f.compl_write,
+               pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+               pq->ctl_f.src_cnt);
+       for (i = 0; i < src_cnt; i++) {
+               dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+                       (unsigned long long) pq16_get_src(descs, i),
+                       pq->coef[i]);
+       }
+       dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+       dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+                    const dma_addr_t *dst, const dma_addr_t *src,
+                    unsigned int src_cnt, const unsigned char *scf,
+                    size_t len, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_pq_descriptor *pq;
+       struct ioat_pq_ext_descriptor *pq_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       u32 offset = 0;
+       u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+       int i, s, idx, with_ext, num_descs;
+       int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
+
+       dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+       /* the engine requires at least two sources (we provide
+        * at least 1 implied source in the DMA_PREP_CONTINUE case)
+        */
+       BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+       num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+       /* we need 2x the number of descriptors to cover greater than 3
+        * sources (we need 1 extra source in the q-only continuation
+        * case and 3 extra sources in the p+q continuation case.
+        */
+       if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+           (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
+               idx = ioat_chan->head;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t, len,
+                                        1 << ioat_chan->xfercap_log);
+
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               pq = desc->pq;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor pq_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
+               pq_ex = ext->pq_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) pq;
+               descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+               for (s = 0; s < src_cnt; s++)
+                       pq_set_src(descs, src[s], offset, scf[s], s);
+
+               /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+               if (dmaf_p_disabled_continue(flags))
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+               else if (dmaf_continue(flags)) {
+                       pq_set_src(descs, dst[0], offset, 0, s++);
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+                       pq_set_src(descs, dst[1], offset, 0, s++);
+               }
+               pq->size = xfer_size;
+               pq->p_addr = dst[0] + offset;
+               pq->q_addr = dst[1] + offset;
+               pq->ctl = 0;
+               pq->ctl_f.op = op;
+               /* we turn on descriptor write back error status */
+               if (ioat_dma->cap & IOAT_CAP_DWBES)
+                       pq->ctl_f.wb_en = result ? 1 : 0;
+               pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+               pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+               pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+               len -= xfer_size;
+               offset += xfer_size;
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last pq descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       dump_pq_desc_dbg(ioat_chan, desc, ext);
+
+       if (!cb32) {
+               pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+               pq->ctl_f.compl_write = 1;
+               compl_desc = desc;
+       } else {
+               /* completion descriptor carries interrupt bit */
+               compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+               hw = compl_desc->hw;
+               hw->ctl = 0;
+               hw->ctl_f.null = 1;
+               hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+               hw->ctl_f.compl_write = 1;
+               hw->size = NULL_DESC_BUFFER_SIZE;
+               dump_desc_dbg(ioat_chan, compl_desc);
+       }
+
+
+       /* we leave the channel locked to ensure in order submission */
+       return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
+                      const dma_addr_t *dst, const dma_addr_t *src,
+                      unsigned int src_cnt, const unsigned char *scf,
+                      size_t len, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *desc;
+       size_t total_len = len;
+       struct ioat_pq_descriptor *pq;
+       u32 offset = 0;
+       u8 op;
+       int i, s, idx, num_descs;
+
+       /* this function is only called with 9-16 sources */
+       op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
+
+       dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+
+       num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+
+       /*
+        * 16 source pq is only available on cb3.3 and has no completion
+        * write hw bug.
+        */
+       if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
+               idx = ioat_chan->head;
+       else
+               return NULL;
+
+       i = 0;
+
+       do {
+               struct ioat_raw_descriptor *descs[4];
+               size_t xfer_size = min_t(size_t, len,
+                                        1 << ioat_chan->xfercap_log);
+
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+               pq = desc->pq;
+
+               descs[0] = (struct ioat_raw_descriptor *) pq;
+
+               desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
+               if (!desc->sed) {
+                       dev_err(to_dev(ioat_chan),
+                               "%s: no free sed entries\n", __func__);
+                       return NULL;
+               }
+
+               pq->sed_addr = desc->sed->dma;
+               desc->sed->parent = desc;
+
+               descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
+               descs[2] = (void *)descs[1] + 64;
+
+               for (s = 0; s < src_cnt; s++)
+                       pq16_set_src(descs, src[s], offset, scf[s], s);
+
+               /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+               if (dmaf_p_disabled_continue(flags))
+                       pq16_set_src(descs, dst[1], offset, 1, s++);
+               else if (dmaf_continue(flags)) {
+                       pq16_set_src(descs, dst[0], offset, 0, s++);
+                       pq16_set_src(descs, dst[1], offset, 1, s++);
+                       pq16_set_src(descs, dst[1], offset, 0, s++);
+               }
+
+               pq->size = xfer_size;
+               pq->p_addr = dst[0] + offset;
+               pq->q_addr = dst[1] + offset;
+               pq->ctl = 0;
+               pq->ctl_f.op = op;
+               pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
+               /* we turn on descriptor write back error status */
+               if (ioat_dma->cap & IOAT_CAP_DWBES)
+                       pq->ctl_f.wb_en = result ? 1 : 0;
+               pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+               pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+               len -= xfer_size;
+               offset += xfer_size;
+       } while (++i < num_descs);
+
+       /* last pq descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+       /* with cb3.3 we should be able to do completion w/o a null desc */
+       pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       pq->ctl_f.compl_write = 1;
+
+       dump_pq16_desc_dbg(ioat_chan, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+       if (dmaf_p_disabled_continue(flags))
+               return src_cnt + 1;
+       else if (dmaf_continue(flags))
+               return src_cnt + 3;
+       else
+               return src_cnt;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+             unsigned int src_cnt, const unsigned char *scf, size_t len,
+             unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       /* specify valid address for disabled result */
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               dst[0] = dst[1];
+       if (flags & DMA_PREP_PQ_DISABLE_Q)
+               dst[1] = dst[0];
+
+       /* handle the single source multiply case from the raid6
+        * recovery path
+        */
+       if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+               dma_addr_t single_source[2];
+               unsigned char single_source_coef[2];
+
+               BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+               single_source[0] = src[0];
+               single_source[1] = src[0];
+               single_source_coef[0] = scf[0];
+               single_source_coef[1] = 0;
+
+               return src_cnt_flags(src_cnt, flags) > 8 ?
+                       __ioat_prep_pq16_lock(chan, NULL, dst, single_source,
+                                              2, single_source_coef, len,
+                                              flags) :
+                       __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
+                                            single_source_coef, len, flags);
+
+       } else {
+               return src_cnt_flags(src_cnt, flags) > 8 ?
+                       __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
+                                              scf, len, flags) :
+                       __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
+                                            scf, len, flags);
+       }
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                 unsigned int src_cnt, const unsigned char *scf, size_t len,
+                 enum sum_check_flags *pqres, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       /* specify valid address for disabled result */
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               pq[0] = pq[1];
+       if (flags & DMA_PREP_PQ_DISABLE_Q)
+               pq[1] = pq[0];
+
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *pqres = 0;
+
+       return src_cnt_flags(src_cnt, flags) > 8 ?
+               __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
+                                      flags) :
+               __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+                                    flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+                unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       unsigned char scf[MAX_SCF];
+       dma_addr_t pq[2];
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       if (src_cnt > MAX_SCF)
+               return NULL;
+
+       memset(scf, 0, src_cnt);
+       pq[0] = dst;
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[1] = dst; /* specify valid address for disabled result */
+
+       return src_cnt_flags(src_cnt, flags) > 8 ?
+               __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
+                                      flags) :
+               __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+                                    flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+                    unsigned int src_cnt, size_t len,
+                    enum sum_check_flags *result, unsigned long flags)
+{
+       unsigned char scf[MAX_SCF];
+       dma_addr_t pq[2];
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       if (src_cnt > MAX_SCF)
+               return NULL;
+
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       memset(scf, 0, src_cnt);
+       pq[0] = src[0];
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[1] = pq[0]; /* specify valid address for disabled result */
+
+       return src_cnt_flags(src_cnt, flags) > 8 ?
+               __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
+                                      scf, len, flags) :
+               __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
+                                    scf, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+
+       if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+               return NULL;
+
+       if (ioat_check_space_lock(ioat_chan, 1) == 0)
+               desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+       else
+               return NULL;
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+
+       desc->txd.flags = flags;
+       desc->len = 1;
+
+       dump_desc_dbg(ioat_chan, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
diff --git a/kern/drivers/dma/ioat/registers.h b/kern/drivers/dma/ioat/registers.h
new file mode 100644 (file)
index 0000000..99c1c24
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET                        0x48
+#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET            0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
+
+/* PCIe config registers */
+
+/* EXPCAPID + N */
+#define IOAT_DEVCTRL_OFFSET                    0x8
+/* relaxed ordering enable */
+#define IOAT_DEVCTRL_ROE                       0x10
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
+#define IOAT_XFERCAP_4KB                       12
+#define IOAT_XFERCAP_8KB                       13
+#define IOAT_XFERCAP_16KB                      14
+#define IOAT_XFERCAP_32KB                      15
+#define IOAT_XFERCAP_32GB                      0
+
+#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN                  0x01
+
+#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
+#define IOAT_VER_MAJOR_MASK                    0xF0
+#define IOAT_VER_MINOR_MASK                    0x0F
+#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
+#define IOAT_INTRDELAY_MASK                    0x3FFF  /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED            0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS              0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING          0x0008
+
+#define IOAT_DMA_CAP_OFFSET                    0x10    /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK                    0x00000001
+#define IOAT_CAP_CRC                           0x00000002
+#define IOAT_CAP_SKIP_MARKER                   0x00000004
+#define IOAT_CAP_DCA                           0x00000010
+#define IOAT_CAP_CRC_MOVE                      0x00000020
+#define IOAT_CAP_FILL_BLOCK                    0x00000040
+#define IOAT_CAP_APIC                          0x00000080
+#define IOAT_CAP_XOR                           0x00000100
+#define IOAT_CAP_PQ                            0x00000200
+#define IOAT_CAP_DWBES                         0x00002000
+#define IOAT_CAP_RAID16SS                      0x00020000
+#define IOAT_CAP_DPS                           0x00800000
+
+#define IOAT_PREFETCH_LIMIT_OFFSET             0x4C    /* CHWPREFLMT */
+
+#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN            0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
+#define IOAT_CHANCTRL_INT_REARM                        0x0001
+#define IOAT_CHANCTRL_RUN                      (IOAT_CHANCTRL_INT_REARM |\
+                                                IOAT_CHANCTRL_ERR_INT_EN |\
+                                                IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+                                                IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
+
+#define IOAT_CHANSTS_OFFSET            0x08    /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR                  0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x8ULL
+#define IOAT_CHANSTS_STATUS    0x7ULL
+#define IOAT_CHANSTS_ACTIVE    0x0
+#define IOAT_CHANSTS_DONE      0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED    0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET                     0x20
+#define IOAT_CHANCMD_RESUME                    0x10
+#define IOAT_CHANCMD_ABORT                     0x08
+#define IOAT_CHANCMD_SUSPEND                   0x04
+#define IOAT_CHANCMD_APPEND                    0x02
+#define IOAT_CHANCMD_START                     0x01
+
+#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW                        0x18
+#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
+
+#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW                   0x20
+#define IOAT_CDAR_OFFSET_HIGH                  0x24
+
+#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR      0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR     0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR     0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR       0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
+#define IOAT_CHANERR_CHANCMD_ERR               0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
+#define IOAT_CHANERR_READ_DATA_ERR             0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
+#define IOAT_CHANERR_CONTROL_ERR       0x0400
+#define IOAT_CHANERR_LENGTH_ERR        0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
+#define IOAT_CHANERR_SOFT_ERR                  0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR          0x10000
+#define IOAT_CHANERR_XOR_Q_ERR                 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR      0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \
+                                  IOAT_CHANERR_WRITE_DATA_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
+
+#define IOAT_CHAN_DRSCTL_OFFSET                        0xB6
+#define IOAT_CHAN_DRSZ_4KB                     0x0000
+#define IOAT_CHAN_DRSZ_8KB                     0x0001
+#define IOAT_CHAN_DRSZ_2MB                     0x0009
+#define IOAT_CHAN_DRS_EN                       0x0100
+#define IOAT_CHAN_DRS_AUTOWRAP                 0x0200
+
+#define IOAT_CHAN_LTR_SWSEL_OFFSET             0xBC
+#define IOAT_CHAN_LTR_SWSEL_ACTIVE             0x0
+#define IOAT_CHAN_LTR_SWSEL_IDLE               0x1
+
+#define IOAT_CHAN_LTR_ACTIVE_OFFSET            0xC0
+#define IOAT_CHAN_LTR_ACTIVE_SNVAL             0x0000  /* 0 us */
+#define IOAT_CHAN_LTR_ACTIVE_SNLATSCALE                0x0800  /* 1us scale */
+#define IOAT_CHAN_LTR_ACTIVE_SNREQMNT          0x8000  /* snoop req enable */
+
+#define IOAT_CHAN_LTR_IDLE_OFFSET              0xC4
+#define IOAT_CHAN_LTR_IDLE_SNVAL               0x0258  /* 600 us */
+#define IOAT_CHAN_LTR_IDLE_SNLATSCALE          0x0800  /* 1us scale */
+#define IOAT_CHAN_LTR_IDLE_SNREQMNT            0x8000  /* snoop req enable */
+
+#endif /* _IOAT_REGISTERS_H_ */