cbdma: add support for Intel CBDMA/IOAT
authorAditya Basu <mitthu@google.com>
Sat, 17 Aug 2019 01:51:56 +0000 (21:51 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Mon, 19 Aug 2019 16:39:09 +0000 (12:39 -0400)
* Creates #cbdma device and a minimal hierarchy with files:
    ktest - run the self-test
    stats - dump register values and driver information
    reset - write 1 to reset the cbdma
    iommu - turn on/off IOMMU support

* Search through all PCI devices and looks for the following devices.
If any device is found, then only a single function is registered.
    * Vendor ID: 0x8086, Device ID: 0x2021 (Skylake)
    * Vendor ID: 0x8086, Device ID: 0x2f20 (Haswell)
* If no cbdma device is found then the device will not attach (bind).

* The PCI bar registers pages are re-mapped with nocache
* A desc chain is populated which describes the DMA transfers
* On MSI interrupts, the driver acks the interrupts and re-enables
interrupts

* User-Space CDMA (ucbdma)
    * desc addresses are converted to kaddr and issued (IOMMU = off)
    * desc addresses are not-converted to kaddr (IOMMU = on)

Signed-off-by: Aditya Basu <mitthu@google.com>
[minor formatting touchups]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/drivers/dev/Kbuild
kern/drivers/dev/cbdma.c [new file with mode: 0644]
kern/include/cbdma_regs.h [new file with mode: 0644]

index b936073..9359432 100644 (file)
@@ -11,6 +11,7 @@ obj-y                                         += kprof.o
 obj-y                                          += mem.o
 obj-y                                          += mnt.o
 obj-y                                          += pci.o
+obj-y                                          += cbdma.o
 obj-y                                          += pipe.o
 obj-y                                          += proc.o
 obj-y                                          += random.o
diff --git a/kern/drivers/dev/cbdma.c b/kern/drivers/dev/cbdma.c
new file mode 100644 (file)
index 0000000..9ec2ce0
--- /dev/null
@@ -0,0 +1,954 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Useful resources:
+ *   - Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ *   - Purley Programmer's Guide
+ *
+ * Acronyms:
+ *   - IOAT: (Intel) I/O Acceleration Technology
+ *   - CDMA: Crystal Beach DMA
+ *
+ * CBDMA Notes
+ * ===========
+ * Every CBDMA PCI function has one MMIO address space (so only BAR0). Each
+ * function can have multiple channels. Currently these devices only have one
+ * channel per function. This can be read from the CHANCNT register (8-bit)
+ * at offset 0x0.
+ *
+ * Each channel be independently configured for DMA. The MMIO config space of
+ * every channel is 0x80 bytes. The first channel (or CHANNEL_0) starts at 0x80
+ * offset.
+ *
+ * CHAINADDR points to a descriptor (desc) ring buffer. More precisely it points
+ * to the first desc in the ring buffer. Each desc represents a single DMA
+ * operation. Look at "struct desc" for it's structure.
+ *
+ * Each desc is 0x40 bytes (or 64 bytes) in size. A 4k page will be able to hold
+ * 4k/64 = 64 entries. Note that the lower 6 bits of CHANADDR should be zero. So
+ * the first desc's address needs to be aligned accordingly. Page-aligning the
+ * first desc address will work because 4k page-aligned addresses will have
+ * the last 12 bits as zero.
+ *
+ * TODO
+ * ====
+ * *MAJOR*
+ *   - Update to the correct struct desc (from Linux kernel)
+ *   - Make the status field embedded in the channel struct (no ptr business)
+ *   - Add file for errors
+ *   - Add locks to guard desc access
+ *   - Freeze VA->PA page mappings till DMA is completed (esp. for ucbdma)
+ * *MINOR*
+ *   - Replace all CBDMA_* constants with IOAT_*
+ *   - Initializes only the first found CBDMA device
+ */
+
+#include <kmalloc.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <net/ip.h>
+#include <linux_compat.h>
+#include <arch/pci.h>
+#include <page_alloc.h>
+#include <pmap.h>
+#include <cbdma_regs.h>
+#include <arch/pci_regs.h>
+
+#define NDESC 1 // initialize these many descs
+#define BUFFERSZ 8192
+
+struct dev                cbdmadevtab;
+static struct pci_device  *pci;
+static void               *mmio;
+static uint64_t           mmio_phy; /* physical addr */
+static uint32_t           mmio_sz;
+static uint8_t            chancnt; /* Total number of channels per function */
+static bool               iommu_enabled;
+static bool               cbdma_break_loop; /* toggle_foo functionality */
+
+/* PCIe Config Space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 */
+enum {
+       DEVSTS = 0x9a, // 16-bit
+       PMCSR  = 0xe4, // 32-bit
+
+       DMAUNCERRSTS = 0x148, // 32-bit (DMA Cluster Uncorrectable Error Status)
+       DMAUNCERRMSK = 0x14c, // 32-bit
+       DMAUNCERRSEV = 0x150, // 32-bit
+       DMAUNCERRPTR = 0x154, // 8-bit
+       DMAGLBERRPTR = 0x160, // 8-bit
+
+       CHANERR_INT    = 0x180, // 32-bit
+       CHANERRMSK_INT = 0x184, // 32-bit
+       CHANERRSEV_INT = 0x188, // 32-bit
+       CHANERRPTR     = 0x18c, // 8-bit
+};
+
+/* QID Path */
+enum {
+       Qdir           = 0,
+       Qcbdmaktest    = 1,
+       Qcbdmastats    = 2,
+       Qcbdmareset    = 3,
+       Qcbdmaucopy    = 4,
+       Qcbdmaiommu    = 5,
+};
+
+/* supported ioat devices */
+enum {
+       ioat2021 = (0x2021 << 16) | 0x8086,
+       ioat2f20 = (0x2f20 << 16) | 0x8086,
+};
+
+static struct dirtab cbdmadir[] = {
+       {".",         {Qdir, 0, QTDIR}, 0, 0555},
+       {"ktest",     {Qcbdmaktest, 0, QTFILE}, 0, 0555},
+       {"stats",     {Qcbdmastats, 0, QTFILE}, 0, 0555},
+       {"reset",     {Qcbdmareset, 0, QTFILE}, 0, 0755},
+       {"ucopy",     {Qcbdmaucopy, 0, QTFILE}, 0, 0755},
+       {"iommu",     {Qcbdmaiommu, 0, QTFILE}, 0, 0755},
+};
+
+/* Descriptor structue as defined in the programmer's guide.
+ * It describes a single DMA transfer
+ */
+struct desc {
+       uint32_t  xfer_size;
+       uint32_t  descriptor_control;
+       uint64_t  src_addr;
+       uint64_t  dest_addr;
+       uint64_t  next_desc_addr;
+       uint64_t  next_source_address;
+       uint64_t  next_destination_address;
+       uint64_t  reserved0;
+       uint64_t  reserved1;
+} __attribute__((packed));
+
+/* The channels are indexed starting from 0 */
+static struct channel {
+       uint8_t                number; // channel number
+       struct desc            *pdesc; // desc ptr
+       int                    ndesc;  // num. of desc
+       uint64_t               *status; // reg: CHANSTS, needs to be 64B aligned
+       uint8_t                ver;    // reg: CBVER
+
+/* DEPRECATED */
+/* MMIO address space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ * Every channel 0x80 bytes in size.
+ */
+       uint8_t  chancmd;
+       uint8_t  xrefcap;
+       uint16_t chanctrl;
+       uint16_t dmacount;
+       uint32_t chanerr;
+       uint64_t chansts;
+       uint64_t chainaddr;
+} cbdmadev, channel0;
+
+#define KTEST_SIZE 64
+static struct {
+       char    printbuf[4096];
+       char    src[KTEST_SIZE];
+       char    dst[KTEST_SIZE];
+       char    srcfill;
+       char    dstfill;
+} ktest;    /* TODO: needs locking */
+
+/* struct passed from the userspace */
+struct ucbdma {
+       struct desc desc;
+       uint64_t    status;
+       uint16_t    ndesc;
+};
+
+/* for debugging via kfunc; break out of infinite polling loops */
+void toggle_cbdma_break_loop(void)
+{
+       cbdma_break_loop = !cbdma_break_loop;
+       printk("cbdma: cbdma_break_loop = %d\n", cbdma_break_loop);
+}
+
+/* Function definitions start here */
+static inline bool is_initialized(void)
+{
+       if (!pci || !mmio)
+               return false;
+       else
+               return true;
+}
+
+static void *get_register(struct channel *c, int offset)
+{
+       uint64_t base = (c->number + 1) * IOAT_CHANNEL_MMIO_SIZE;
+
+       return (char *) mmio + base + offset;
+}
+
+static char *devname(void)
+{
+       return cbdmadevtab.name;
+}
+
+static struct chan *cbdmaattach(char *spec)
+{
+       if (!is_initialized())
+               error(ENODEV, "no cbdma device detected");
+       return devattach(devname(), spec);
+}
+
+struct walkqid *cbdmawalk(struct chan *c, struct chan *nc, char **name,
+                        unsigned int nname)
+{
+       return devwalk(c, nc, name, nname, cbdmadir,
+                      ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static size_t cbdmastat(struct chan *c, uint8_t *dp, size_t n)
+{
+       return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+/* return string representation of chansts */
+char *cbdma_str_chansts(uint64_t chansts)
+{
+       char *status = "unrecognized status";
+
+       switch (chansts & IOAT_CHANSTS_STATUS) {
+       case IOAT_CHANSTS_ACTIVE:
+               status = "ACTIVE";
+               break;
+       case IOAT_CHANSTS_DONE:
+               status = "DONE";
+               break;
+       case IOAT_CHANSTS_SUSPENDED:
+               status = "SUSPENDED";
+               break;
+       case IOAT_CHANSTS_HALTED:
+               status = "HALTED";
+               break;
+       case IOAT_CHANSTS_ARMED:
+               status = "ARMED";
+               break;
+       default:
+               break;
+       }
+       return status;
+}
+
+/* print descriptors on console (for debugging) */
+static void dump_desc(struct desc *d, int count)
+{
+       printk("dumping descriptors (count = %d):\n", count);
+
+       while (count > 0) {
+               printk("desc: 0x%x, size: %d bytes\n",
+                       d, sizeof(struct desc));
+               printk("[32] desc->xfer_size: 0x%x\n",
+                       d->xfer_size);
+               printk("[32] desc->descriptor_control: 0x%x\n",
+                       d->descriptor_control);
+               printk("[64] desc->src_addr: %p\n",
+                       d->src_addr);
+               printk("[64] desc->dest_addr: %p\n",
+                       d->dest_addr);
+               printk("[64] desc->next_desc_addr: %p\n",
+                       d->next_desc_addr);
+               printk("[64] desc->next_source_address: %p\n",
+                       d->next_source_address);
+               printk("[64] desc->next_destination_address: %p\n",
+                       d->next_destination_address);
+               printk("[64] desc->reserved0: %p\n",
+                       d->reserved0);
+               printk("[64] desc->reserved1: %p\n",
+                       d->reserved1);
+
+               count--;
+               if (count > 0)
+                       d = (struct desc *) KADDR(d->next_desc_addr);
+               printk("\n");
+       }
+}
+
+/* initialize desc ring
+ *
+ - Can be called multiple times, with different "ndesc" values.
+ - NOTE: We only create _one_ valid desc. The next field points back itself
+        (ring buffer).
+ */
+static void init_desc(struct channel *c, int ndesc)
+{
+       struct desc *d, *tmp;
+       int i;
+       const int max_ndesc = PGSIZE / sizeof(struct desc);
+
+       /* sanity checks */
+       if (ndesc > max_ndesc) {
+               printk("cbdma: allocating only %d desc instead of %d desc\n",
+                       max_ndesc, ndesc);
+               ndesc = max_ndesc;
+       }
+
+       c->ndesc = ndesc;
+
+       /* allocate pages for descriptors, last 6-bits must be zero */
+       if (!c->pdesc)
+               c->pdesc = kpage_zalloc_addr();
+
+       if (!c->pdesc) { /* error does not return */
+               printk("cbdma: cannot alloc page for desc\n");
+               return; /* TODO: return "false" */
+       }
+
+       /* preparing descriptors */
+       d = c->pdesc;
+       d->xfer_size = 1;
+       d->descriptor_control = CBDMA_DESC_CTRL_NULL_DESC;
+       d->next_desc_addr = PADDR(d);
+}
+
+/* struct channel is only used for get_register */
+static inline void cleanup_post_copy(struct channel *c)
+{
+       uint64_t value;
+
+       /* mmio_reg: DMACOUNT */
+       value = read16(get_register(c, IOAT_CHAN_DMACOUNT_OFFSET));
+       if (value != 0) {
+               printk("cbdma: info: DMACOUNT = %d\n", value); /* should be 0 */
+               write16(0, mmio + CBDMA_DMACOUNT_OFFSET);
+       }
+
+       /* mmio_reg: CHANERR */
+       value = read32(get_register(c, IOAT_CHANERR_OFFSET));
+       if (value != 0) {
+               printk("cbdma: error: CHANERR = 0x%x\n", value);
+               write32(value, get_register(c, IOAT_CHANERR_OFFSET));
+       }
+
+       /* ack errors */
+       if (ACCESS_PCIE_CONFIG_SPACE) {
+               /* PCIe_reg: CHANERR_INT */
+               value = pcidev_read32(pci, CHANERR_INT);
+               if (value != 0) {
+                       printk("cbdma: error: CHANERR_INT = 0x%x\n", value);
+                       pcidev_write32(pci, CHANERR_INT, value);
+               }
+
+               /* PCIe_reg: DMAUNCERRSTS */
+               value = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+               if (value != 0) {
+                       printk("cbdma: error: DMAUNCERRSTS = 0x%x\n", value);
+                       pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET,
+                                      value);
+               }
+       }
+}
+
+/* struct channel is only used for get_register */
+static inline void perform_dma(struct channel *c, physaddr_t completion_sts,
+                              physaddr_t desc, uint16_t count)
+{
+       void __iomem *offset;
+
+       /* Set channel completion register where CBDMA will write content of
+        * CHANSTS register upon successful DMA completion or error condition
+        */
+       offset = get_register(c, IOAT_CHANCMP_OFFSET);
+       write64(completion_sts, offset);
+
+       /* write locate of first desc to register CHAINADDR */
+       offset = get_register(c, IOAT_CHAINADDR_OFFSET(c->ver));
+       write64(desc, offset);
+       wmb_f();
+
+       /* writing valid number of descs: starts the DMA */
+       offset = get_register(c, IOAT_CHAN_DMACOUNT_OFFSET);
+       write16(count, offset);
+}
+
+static inline void wait_for_dma_completion(uint64_t *cmpsts)
+{
+       uint64_t sts;
+
+       do {
+               cpu_relax();
+               sts = *cmpsts;
+               if (cbdma_break_loop) {
+                       printk("cbdma: cmpsts: %p = 0x%llx\n", cmpsts, sts);
+                       break;
+               }
+       } while ((sts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+/* cbdma_ktest: performs functional test on CBDMA
+ *
+ - Allocates 2 kernel pages: ktest_src and ktest_dst.
+ - memsets the ktest_src page
+ - Prepare descriptors for DMA transfer (need to be aligned)
+ - Initiate the transfer
+ - Prints results
+ */
+static void cbdma_ktest(void)
+{
+       static struct desc *d;
+       uint64_t value;
+       struct channel *c = &channel0;
+
+       /* initialize src and dst address */
+       memset(ktest.src, ktest.srcfill, KTEST_SIZE);
+       memset(ktest.dst, ktest.dstfill, KTEST_SIZE);
+       ktest.src[KTEST_SIZE-1] = '\0';
+       ktest.dst[KTEST_SIZE-1] = '\0';
+
+       /* for subsequent ktests */
+       ktest.srcfill += 1;
+
+       /* preparing descriptors */
+       d = channel0.pdesc;
+       d->xfer_size            = (uint32_t) KTEST_SIZE;
+       d->src_addr             = (uint64_t) PADDR(ktest.src);
+       d->dest_addr            = (uint64_t) PADDR(ktest.dst);
+       d->descriptor_control   = CBDMA_DESC_CTRL_INTR_ON_COMPLETION |
+                                 CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
+
+       memset((uint64_t *)c->status, 0, sizeof(c->status));
+
+       /* perform actual DMA */
+       perform_dma(c, PADDR(c->status), PADDR(c->pdesc), 1);
+       wait_for_dma_completion(c->status);
+       cleanup_post_copy(c);
+}
+
+/* convert a userspace pointer to kaddr based pointer
+ * TODO: this is dangerous and the pages are not pinned. Debugging only! */
+static inline void *uptr_to_kptr(void *ptr)
+{
+       return (void *) uva2kva(current, ptr, 1, PROT_WRITE);
+}
+
+/* function that uses kernel addresses to perform DMA.
+ * Note: does not perform error checks for src / dest addr.
+ * TODO: this only works if ktest is not run. Still it fails on alternate runs.
+ *       Likely some error in setting up the desc from userspace.
+ */
+static void issue_dma_kaddr(struct ucbdma *u)
+{
+       struct ucbdma *u_kaddr = uptr_to_kptr(u);
+       /* first field is struct desc */
+       struct desc *d = (struct desc *) u_kaddr;
+       struct channel *c = &channel0;
+       uint64_t value;
+
+       if (!u_kaddr) {
+               printk("[kern] cannot get kaddr for useraddr: %p\n", u);
+               return;
+       }
+       printk("[kern] ucbdma: user: %p kern: %p\n", u, u_kaddr);
+
+       /* preparing descriptors */
+       d->src_addr   = (uint64_t) PADDR(uptr_to_kptr((void*) d->src_addr));
+       d->dest_addr  = (uint64_t) PADDR(uptr_to_kptr((void*) d->dest_addr));
+       d->next_desc_addr = (uint64_t)
+                           PADDR(uptr_to_kptr((void*) d->next_desc_addr));
+
+       /* perform actual DMA */
+       perform_dma(c, PADDR(&u_kaddr->status), PADDR(d), u_kaddr->ndesc);
+       wait_for_dma_completion(&u_kaddr->status);
+       cleanup_post_copy(c);
+}
+
+/* function that uses virtual (process) addresses to perform DMA; IOMMU = ON
+ * TODO: Verify once the IOMMU is setup and enabled.
+ */
+static void issue_dma_vaddr(struct ucbdma *u)
+{
+       struct ucbdma *u_kaddr = uptr_to_kptr(u);
+       struct channel *c = &channel0;
+       uint64_t value;
+
+       printk("[kern] IOMMU = ON\n");
+       printk("[kern] ucbdma: user: %p kern: %p ndesc: %d\n", u,
+               &u_kaddr->desc, u_kaddr->ndesc);
+
+       /* perform actual DMA */
+       perform_dma(c, (physaddr_t) &u->status, (physaddr_t) &u->desc,
+                   u_kaddr->ndesc);
+       wait_for_dma_completion(&u_kaddr->status);
+       cleanup_post_copy(&channel0);
+}
+
+/* cbdma_stats: get stats about the device and driver
+ */
+static struct sized_alloc *open_stats(void)
+{
+       struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+       uint64_t value;
+
+       sza_printf(sza,
+               "Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+               pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+       /* driver info. */
+       sza_printf(sza, "    Driver Information:\n");
+       sza_printf(sza,
+               "\tmmio: %p\n"
+               "\tmmio_phy: 0x%x\n"
+               "\tmmio_sz: %lu\n"
+               "\ttotal_channels: %d\n"
+               "\tdesc_kaddr: %p\n"
+               "\tdesc_paddr: %p\n"
+               "\tdesc_num: %d\n"
+               "\tver: 0x%x\n"
+               "\tstatus_kaddr: %p\n"
+               "\tstatus_paddr: %p\n"
+               "\tstatus_value: 0x%x\n",
+               mmio, mmio_phy, mmio_sz, chancnt,
+               channel0.pdesc, PADDR(channel0.pdesc), channel0.ndesc,
+               channel0.ver, channel0.status, PADDR(channel0.status),
+               *(uint64_t *)channel0.status);
+
+       /* print the PCI registers */
+       sza_printf(sza, "    PCIe Config Registers:\n");
+
+       value = pcidev_read16(pci, PCI_CMD_REG);
+       sza_printf(sza, "\tPCICMD: 0x%x\n", value);
+
+       value = pcidev_read16(pci, PCI_STATUS_REG);
+       sza_printf(sza, "\tPCISTS: 0x%x\n", value);
+
+       value = pcidev_read16(pci, PCI_REVID_REG);
+       sza_printf(sza, "\tRID: 0x%x\n", value);
+
+       value = pcidev_read32(pci, PCI_BAR0_STD);
+       sza_printf(sza, "\tCB_BAR: 0x%x\n", value);
+
+       value = pcidev_read16(pci, DEVSTS);
+       sza_printf(sza, "\tDEVSTS: 0x%x\n", value);
+
+       value = pcidev_read32(pci, PMCSR);
+       sza_printf(sza, "\tPMCSR: 0x%x\n", value);
+
+       value = pcidev_read32(pci, DMAUNCERRSTS);
+       sza_printf(sza, "\tDMAUNCERRSTS: 0x%x\n", value);
+
+       value = pcidev_read32(pci, DMAUNCERRMSK);
+       sza_printf(sza, "\tDMAUNCERRMSK: 0x%x\n", value);
+
+       value = pcidev_read32(pci, DMAUNCERRSEV);
+       sza_printf(sza, "\tDMAUNCERRSEV: 0x%x\n", value);
+
+       value = pcidev_read8(pci, DMAUNCERRPTR);
+       sza_printf(sza, "\tDMAUNCERRPTR: 0x%x\n", value);
+
+       value = pcidev_read8(pci, DMAGLBERRPTR);
+       sza_printf(sza, "\tDMAGLBERRPTR: 0x%x\n", value);
+
+       value = pcidev_read32(pci, CHANERR_INT);
+       sza_printf(sza, "\tCHANERR_INT: 0x%x\n", value);
+
+       value = pcidev_read32(pci, CHANERRMSK_INT);
+       sza_printf(sza, "\tCHANERRMSK_INT: 0x%x\n", value);
+
+       value = pcidev_read32(pci, CHANERRSEV_INT);
+       sza_printf(sza, "\tCHANERRSEV_INT: 0x%x\n", value);
+
+       value = pcidev_read8(pci, CHANERRPTR);
+       sza_printf(sza, "\tCHANERRPTR: 0x%x\n", value);
+
+       sza_printf(sza, "    CHANNEL_0 MMIO Registers:\n");
+
+       value = read8(mmio + CBDMA_CHANCMD_OFFSET);
+       sza_printf(sza, "\tCHANCMD: 0x%x\n", value);
+
+       value = read8(mmio + IOAT_VER_OFFSET);
+       sza_printf(sza, "\tCBVER: 0x%x major=%d minor=%d\n",
+                  value,
+                  GET_IOAT_VER_MAJOR(value),
+                  GET_IOAT_VER_MINOR(value));
+
+       value = read16(mmio + CBDMA_CHANCTRL_OFFSET);
+       sza_printf(sza, "\tCHANCTRL: 0x%llx\n", value);
+
+       value = read64(mmio + CBDMA_CHANSTS_OFFSET);
+       sza_printf(sza, "\tCHANSTS: 0x%x [%s], desc_addr: %p, raw: 0x%llx\n",
+                  (value & IOAT_CHANSTS_STATUS),
+                  cbdma_str_chansts(value),
+                  (value & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR),
+                  value);
+
+       value = read64(mmio + CBDMA_CHAINADDR_OFFSET);
+       sza_printf(sza, "\tCHAINADDR: %p\n", value);
+
+       value = read64(mmio + CBDMA_CHANCMP_OFFSET);
+       sza_printf(sza, "\tCHANCMP: %p\n", value);
+
+       value = read16(mmio + CBDMA_DMACOUNT_OFFSET);
+       sza_printf(sza, "\tDMACOUNT: %d\n", value);
+
+       value = read32(mmio + CBDMA_CHANERR_OFFSET);
+       sza_printf(sza, "\tCHANERR: 0x%x\n", value);
+
+       return sza;
+}
+
+static struct sized_alloc *open_reset(void)
+{
+       struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+       if (cbdma_is_reset_pending())
+               sza_printf(sza, "Status: Reset is pending\n");
+       else
+               sza_printf(sza, "Status: No pending reset\n");
+
+       sza_printf(sza, "Write '1' to perform reset!\n");
+
+       return sza;
+}
+
+static struct sized_alloc *open_iommu(void)
+{
+       struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+       sza_printf(sza, "IOMMU enabled = %s\n", iommu_enabled ? "yes":"no");
+       sza_printf(sza, "Write '0' to disable or '1' to enable the IOMMU\n");
+
+       return sza;
+}
+
+/* targets channel0 */
+static struct sized_alloc *open_ktest(void)
+{
+       struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+       /* run the test */
+       cbdma_ktest();
+
+       sza_printf(sza,
+          "Self-test Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+          pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+       sza_printf(sza, "\tChannel Status: %s (raw: 0x%x)\n",
+               cbdma_str_chansts(*((uint64_t *)channel0.status)),
+               (*((uint64_t *)channel0.status) & IOAT_CHANSTS_STATUS));
+
+       sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
+       sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
+       sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
+       sza_printf(sza, "\tsrc_str (after copy): %s\n", ktest.src);
+       sza_printf(sza, "\tdst_str (after copy): %s\n", ktest.dst);
+
+       return sza;
+}
+
+/* cbdma_reset_device: this fixes any programming errors done before
+ */
+void cbdma_reset_device(void)
+{
+       int cbdmaver;
+       uint32_t error;
+
+       /* make sure the driver is initialized */
+       if (!mmio)
+               error(EIO, "cbdma: mmio addr not set");
+
+       pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                                                       | PCI_COMMAND_MASTER);
+       /* fetch version */
+       cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+       /* ack channel errros */
+       error = read32(mmio + CBDMA_CHANERR_OFFSET);
+       write32(error, mmio + CBDMA_CHANERR_OFFSET);
+
+       if (ACCESS_PCIE_CONFIG_SPACE) {
+               /* ack pci device level errros */
+               /* clear DMA Cluster Uncorrectable Error Status */
+               error = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+               pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET, error);
+
+               /* clear DMA Channel Error Status */
+               error = pcidev_read32(pci, IOAT_PCI_CHANERR_INT_OFFSET);
+               pcidev_write32(pci, IOAT_PCI_CHANERR_INT_OFFSET, error);
+       }
+
+       /* reset */
+       write8(IOAT_CHANCMD_RESET, mmio
+                                  + IOAT_CHANNEL_MMIO_SIZE
+                                  + IOAT_CHANCMD_OFFSET(cbdmaver));
+
+       pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                       | PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
+
+       printk("cbdma: reset performed\n");
+}
+
+/* cbdma_is_reset_pending: returns true if reset is pending
+ */
+bool cbdma_is_reset_pending(void)
+{
+       int cbdmaver;
+       int status;
+
+       /* make sure the driver is initialized */
+       if (!mmio) {
+               error(EPERM, "cbdma: mmio addr not set");
+               return false; /* does not reach */
+       }
+
+       /* fetch version */
+       cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+       status = read8(mmio + IOAT_CHANNEL_MMIO_SIZE
+                       + IOAT_CHANCMD_OFFSET(cbdmaver));
+
+       return (status & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+///////// SYS INTERFACE ////////////////////////////////////////////////////////
+
+static struct chan *cbdmaopen(struct chan *c, int omode)
+{
+       switch (c->qid.path) {
+       case Qcbdmastats:
+               c->synth_buf = open_stats();
+               break;
+       case Qcbdmareset:
+               c->synth_buf = open_reset();
+               break;
+       case Qcbdmaiommu:
+               c->synth_buf = open_iommu();
+               break;
+       case Qcbdmaktest:
+               c->synth_buf = open_ktest();
+               break;
+       case Qdir:
+       case Qcbdmaucopy:
+               break;
+       default:
+               error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+       }
+
+       return devopen(c, omode, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static void cbdmaclose(struct chan *c)
+{
+       switch (c->qid.path) {
+       case Qcbdmastats:
+       case Qcbdmareset:
+       case Qcbdmaiommu:
+       case Qcbdmaktest:
+               kfree(c->synth_buf);
+               c->synth_buf = NULL;
+               break;
+       case Qdir:
+       case Qcbdmaucopy:
+               break;
+       default:
+               error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+       }
+}
+
+static size_t cbdmaread(struct chan *c, void *va, size_t n, off64_t offset)
+{
+       struct sized_alloc *sza = c->synth_buf;
+
+       switch (c->qid.path) {
+       case Qcbdmaktest:
+       case Qcbdmastats:
+       case Qcbdmareset:
+       case Qcbdmaiommu:
+               return readstr(offset, va, n, sza->buf);
+       case Qcbdmaucopy:
+               return readstr(offset, va, n,
+                       "Write address of struct ucopy to issue DMA\n");
+       case Qdir:
+               return devdirread(c, va, n, cbdmadir, ARRAY_SIZE(cbdmadir),
+                                       devgen);
+       default:
+               error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+       }
+
+       return -1;      /* not reached */
+}
+
+static void init_channel(struct channel *c, int cnum, int ndesc)
+{
+       c->number = cnum;
+       c->pdesc = NULL;
+       init_desc(c, ndesc);
+
+       /* this is a writeback field; the hardware will update this value */
+       if (c->status == 0)
+               c->status = kmalloc_align(sizeof(uint64_t), MEM_WAIT, 8);
+       assert(c->status != 0);
+
+       /* cbdma version */
+       c->ver = read8(mmio + IOAT_VER_OFFSET);
+
+       /* Set "Any Error Abort Enable": enables abort for any error encountered
+        * Set "Error Completion Enable": enables completion write to address in
+                                         CHANCMP for any error
+        * Reset "Interrupt Disable": W1C, when clear enables interrupt to fire
+                                   for next descriptor that specifies interrupt
+       */
+       write8(IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN,
+              get_register(c, IOAT_CHANCTRL_OFFSET));
+}
+
+static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset)
+{
+       switch (c->qid.path) {
+       case Qdir:
+               error(EPERM, "writing not permitted");
+       case Qcbdmaktest:
+       case Qcbdmastats:
+               error(EPERM, ERROR_FIXME);
+       case Qcbdmareset:
+               if (offset == 0 && n > 0 && *(char *)va == '1') {
+                       cbdma_reset_device();
+                       init_channel(&channel0, 0, NDESC);
+               } else {
+                       error(EINVAL, "cannot be empty string");
+               }
+               return n;
+       case Qcbdmaucopy:
+               if (offset == 0 && n > 0) {
+                       printk("[kern] value from userspace: %p\n", va);
+                       if (iommu_enabled)
+                               issue_dma_vaddr(va);
+                       else
+                               issue_dma_kaddr(va);
+                       return sizeof(8);
+               }
+               return 0;
+       case Qcbdmaiommu:
+               if (offset == 0 && n > 0 && *(char *)va == '1')
+                       iommu_enabled = true;
+               else if (offset == 0 && n > 0 && *(char *)va == '0')
+                       iommu_enabled = false;
+               else
+                       error(EINVAL, "cannot be empty string");
+               return n;
+       default:
+               error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+       }
+
+       return -1;      /* not reached */
+}
+
+static void cbdma_interrupt(struct hw_trapframe *hw_tf, void *arg)
+{
+       uint16_t value;
+
+       value = read16(get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+       write16(value | IOAT_CHANCTRL_INT_REARM,
+               get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+}
+
+void cbdmainit(void)
+{
+       int tbdf;
+       int i;
+       int id;
+       struct pci_device *pci_iter;
+
+       /* assigning global variables */
+       pci             = NULL;
+       mmio            = NULL;
+       mmio_sz         = -1;
+
+       /* initialize cbdmadev */
+       memset(&cbdmadev, 0x0, sizeof(cbdmadev));
+
+       /* search for the device 00:04.0 */
+       STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) {
+               id = pci_iter->dev_id << 16 | pci_iter->ven_id;
+               switch (id) {
+               default:
+                       continue;
+               case ioat2021:
+               case ioat2f20:
+                       /* hack: bus 0 is the PCI_ALL iommu.
+                        * Can remove this once we add code for scoped IOMMU */
+                       if (pci_iter->bus != 0)
+                               continue;
+                       pci = pci_iter;
+                       break;
+               }
+       }
+
+       if (pci == NULL) {
+               printk("cbdma: no Intel CBDMA device found\n");
+               return;
+       }
+
+       /* search and find the mapped mmio region */
+       for (i = 0; i < COUNT_OF(pci->bar); i++) {
+               if (pci->bar[i].mmio_sz == 0)
+                       continue;
+               mmio_phy = (pci->bar[0].mmio_base32
+                        ? pci->bar[0].mmio_base32
+                        : pci->bar[0].mmio_base64);
+               mmio_sz  = pci->bar[i].mmio_sz;
+               mmio     = (void *) vmap_pmem_nocache(mmio_phy, mmio_sz);
+               break;
+       }
+
+       /* handle any errors */
+       if (mmio_sz == -1) {
+               printk("cbdma: invalid mmio_sz\n");
+               return;
+       }
+
+       if (mmio == NULL) {
+               printk("cbdma: cannot map %p\n", mmio_phy);
+               return;
+       }
+
+       /* performance related stuff */
+       pci_set_cacheline_size(pci);
+
+       /* Get the channel count. Top 3 bits of the register are reserved. */
+       chancnt = read8(mmio + IOAT_CHANCNT_OFFSET) & 0x1F;
+
+       /* initialization successful; print stats */
+       printk("cbdma: registered [%x:%x] at %02x:%02x.%x // "
+              "mmio:%p mmio_sz:%lu\n",
+              pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func,
+              mmio, mmio_sz);
+
+       tbdf = MKBUS(BusPCI, pci->bus, pci->dev, pci->func);
+       register_irq(pci->irqline, cbdma_interrupt, NULL, tbdf);
+
+       /* reset device */
+       cbdma_reset_device();
+
+       /* initialize channel(s) */
+       init_channel(&channel0, 0, NDESC);
+
+       /* setup ktest struct */
+       ktest.srcfill = '1';
+       ktest.dstfill = '0';
+}
+
+struct dev cbdmadevtab __devtab = {
+       .name       = "cbdma",
+       .reset      = devreset,
+       .init       = cbdmainit,
+       .shutdown   = devshutdown,
+       .attach     = cbdmaattach,
+       .walk       = cbdmawalk,
+       .stat       = cbdmastat,
+       .open       = cbdmaopen,
+       .create     = devcreate,
+       .close      = cbdmaclose,
+       .read       = cbdmaread,
+       .bread      = devbread,
+       .write      = cbdmawrite,
+       .bwrite     = devbwrite,
+       .remove     = devremove,
+       .wstat      = devwstat,
+};
diff --git a/kern/include/cbdma_regs.h b/kern/include/cbdma_regs.h
new file mode 100644 (file)
index 0000000..6c8ec3d
--- /dev/null
@@ -0,0 +1,268 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Copy of CBDMA register definitions from Linux kernel (around v5.1)
+ * drivers/dma/ioat/registers.h
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define ACCESS_PCIE_CONFIG_SPACE 1
+
+bool cbdma_is_reset_pending(void);
+void cbdma_reset_device(void);
+
+/* file: drivers/dma/ioat/hw.h */
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+#define IOAT_VER_3_3            0x33    /* Version 3.3 */
+#define IOAT_VER_3_4           0x34    /* Version 3.4 */
+/* -------------------------------------- */
+
+#define IOAT_PCI_DMACTRL_OFFSET                        0x48
+#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET            0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
+#define IOAT_XFERCAP_4KB                       12
+#define IOAT_XFERCAP_8KB                       13
+#define IOAT_XFERCAP_16KB                      14
+#define IOAT_XFERCAP_32KB                      15
+#define IOAT_XFERCAP_32GB                      0
+
+#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN                  0x01
+
+#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
+#define IOAT_VER_MAJOR_MASK                    0xF0
+#define IOAT_VER_MINOR_MASK                    0x0F
+#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
+#define IOAT_INTRDELAY_MASK                    0x3FFF  /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED            0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS              0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING          0x0008
+
+#define IOAT_DMA_CAP_OFFSET                    0x10    /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK                    0x00000001
+#define IOAT_CAP_CRC                           0x00000002
+#define IOAT_CAP_SKIP_MARKER                   0x00000004
+#define IOAT_CAP_DCA                           0x00000010
+#define IOAT_CAP_CRC_MOVE                      0x00000020
+#define IOAT_CAP_FILL_BLOCK                    0x00000040
+#define IOAT_CAP_APIC                          0x00000080
+#define IOAT_CAP_XOR                           0x00000100
+#define IOAT_CAP_PQ                            0x00000200
+#define IOAT_CAP_DWBES                         0x00002000
+#define IOAT_CAP_RAID16SS                      0x00020000
+
+#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN            0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
+#define IOAT_CHANCTRL_INT_REARM                        0x0001
+#define IOAT_CHANCTRL_RUN                      (IOAT_CHANCTRL_INT_REARM |\
+                                                IOAT_CHANCTRL_ERR_INT_EN |\
+                                                IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+                                                IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET           0x04    /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET           0x08    /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW       0x04
+#define IOAT2_CHANSTS_OFFSET_LOW       0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)           ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH      0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH      0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)          ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR                  0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x8ULL
+#define IOAT_CHANSTS_STATUS    0x7ULL
+#define IOAT_CHANSTS_ACTIVE    0x0
+#define IOAT_CHANSTS_DONE      0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED    0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET                     0x20
+#define IOAT_CHANCMD_RESUME                    0x10
+#define IOAT_CHANCMD_ABORT                     0x08
+#define IOAT_CHANCMD_SUSPEND                   0x04
+#define IOAT_CHANCMD_APPEND                    0x02
+#define IOAT_CHANCMD_START                     0x01
+
+#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW                        0x18
+#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
+
+#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW                   0x20
+#define IOAT_CDAR_OFFSET_HIGH                  0x24
+
+#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR      0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR     0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR     0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR       0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
+#define IOAT_CHANERR_CHANCMD_ERR               0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
+#define IOAT_CHANERR_READ_DATA_ERR             0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
+#define IOAT_CHANERR_CONTROL_ERR       0x0400
+#define IOAT_CHANERR_LENGTH_ERR        0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
+#define IOAT_CHANERR_SOFT_ERR                  0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR          0x10000
+#define IOAT_CHANERR_XOR_Q_ERR                 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR      0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
+
+/* Extras: Added by Aditya Basu <mitthu@google.com> */
+#define CBDMA_CHANCMD_OFFSET                           0x84
+#define CBDMA_CHANSTS_OFFSET                           0x88
+#define CBDMA_CHANCTRL_OFFSET                          0x80
+#define CBDMA_DMACOUNT_OFFSET                          0x86
+#define CBDMA_CHAINADDR_OFFSET                         0x90
+#define CBDMA_CHANCMP_OFFSET                           0x98
+#define CBDMA_CHANERR_OFFSET                           0xa8
+#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION             0x01 /* 32-bit field */
+#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION    0x08
+#define CBDMA_DESC_CTRL_NULL_DESC                      0x20
+
+#define IOAT_CHANSTS_ARMED                             0x4
+
+#endif /* _IOAT_REGISTERS_H_ */