Import mlx4 files from Linux 4.1
authorXiao Jia <stfairy@gmail.com>
Tue, 8 Sep 2015 23:26:58 +0000 (16:26 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 23 Sep 2015 17:49:29 +0000 (13:49 -0400)
47 files changed:
kern/drivers/net/mlx4/Kconfig [new file with mode: 0644]
kern/drivers/net/mlx4/Makefile [new file with mode: 0644]
kern/drivers/net/mlx4/alloc.c [new file with mode: 0644]
kern/drivers/net/mlx4/catas.c [new file with mode: 0644]
kern/drivers/net/mlx4/cmd.c [new file with mode: 0644]
kern/drivers/net/mlx4/cq.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_clock.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_cq.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_dcb_nl.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_ethtool.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_main.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_netdev.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_port.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_port.h [new file with mode: 0644]
kern/drivers/net/mlx4/en_resources.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_rx.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_selftest.c [new file with mode: 0644]
kern/drivers/net/mlx4/en_tx.c [new file with mode: 0644]
kern/drivers/net/mlx4/eq.c [new file with mode: 0644]
kern/drivers/net/mlx4/fw.c [new file with mode: 0644]
kern/drivers/net/mlx4/fw.h [new file with mode: 0644]
kern/drivers/net/mlx4/fw_qos.c [new file with mode: 0644]
kern/drivers/net/mlx4/fw_qos.h [new file with mode: 0644]
kern/drivers/net/mlx4/icm.c [new file with mode: 0644]
kern/drivers/net/mlx4/icm.h [new file with mode: 0644]
kern/drivers/net/mlx4/intf.c [new file with mode: 0644]
kern/drivers/net/mlx4/main.c [new file with mode: 0644]
kern/drivers/net/mlx4/mcg.c [new file with mode: 0644]
kern/drivers/net/mlx4/mlx4.h [new file with mode: 0644]
kern/drivers/net/mlx4/mlx4_en.h [new file with mode: 0644]
kern/drivers/net/mlx4/mlx4_stats.h [new file with mode: 0644]
kern/drivers/net/mlx4/mr.c [new file with mode: 0644]
kern/drivers/net/mlx4/pd.c [new file with mode: 0644]
kern/drivers/net/mlx4/port.c [new file with mode: 0644]
kern/drivers/net/mlx4/profile.c [new file with mode: 0644]
kern/drivers/net/mlx4/qp.c [new file with mode: 0644]
kern/drivers/net/mlx4/reset.c [new file with mode: 0644]
kern/drivers/net/mlx4/resource_tracker.c [new file with mode: 0644]
kern/drivers/net/mlx4/sense.c [new file with mode: 0644]
kern/drivers/net/mlx4/srq.c [new file with mode: 0644]
kern/include/linux/mlx4/cmd.h [new file with mode: 0644]
kern/include/linux/mlx4/cq.h [new file with mode: 0644]
kern/include/linux/mlx4/device.h [new file with mode: 0644]
kern/include/linux/mlx4/doorbell.h [new file with mode: 0644]
kern/include/linux/mlx4/driver.h [new file with mode: 0644]
kern/include/linux/mlx4/qp.h [new file with mode: 0644]
kern/include/linux/mlx4/srq.h [new file with mode: 0644]

diff --git a/kern/drivers/net/mlx4/Kconfig b/kern/drivers/net/mlx4/Kconfig
new file mode 100644 (file)
index 0000000..1486ce9
--- /dev/null
@@ -0,0 +1,46 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX4_EN
+       tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
+       depends on PCI
+       select MLX4_CORE
+       select PTP_1588_CLOCK
+       ---help---
+         This driver supports Mellanox Technologies ConnectX Ethernet
+         devices.
+
+config MLX4_EN_DCB
+       bool "Data Center Bridging (DCB) Support"
+       default y
+       depends on MLX4_EN && DCB
+       ---help---
+         Say Y here if you want to use Data Center Bridging (DCB) in the
+         driver.
+         If set to N, will not be able to configure QoS and ratelimit attributes.
+         This flag is depended on the kernel's DCB support.
+
+         If unsure, set to Y
+
+config MLX4_EN_VXLAN
+       bool "VXLAN offloads Support"
+       default y
+       depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m)
+       ---help---
+         Say Y here if you want to use VXLAN offloads in the driver.
+
+config MLX4_CORE
+       tristate
+       depends on PCI
+       default n
+
+config MLX4_DEBUG
+       bool "Verbose debugging output" if (MLX4_CORE && EXPERT)
+       depends on MLX4_CORE
+       default y
+       ---help---
+         This option causes debugging code to be compiled into the
+         mlx4_core driver.  The output can be turned on via the
+         debug_level module parameter (which can also be set after
+         the driver is loaded through sysfs).
diff --git a/kern/drivers/net/mlx4/Makefile b/kern/drivers/net/mlx4/Makefile
new file mode 100644 (file)
index 0000000..c82217e
--- /dev/null
@@ -0,0 +1,11 @@
+obj-$(CONFIG_MLX4_CORE)                += mlx4_core.o
+
+mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
+               main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
+               srq.o resource_tracker.o
+
+obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
+
+mlx4_en-y :=   en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
+               en_resources.o en_netdev.o en_selftest.o en_clock.o
+mlx4_en-$(CONFIG_MLX4_EN_DCB) += en_dcb_nl.o
diff --git a/kern/drivers/net/mlx4/alloc.c b/kern/drivers/net/mlx4/alloc.c
new file mode 100644 (file)
index 0000000..0c51c69
--- /dev/null
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+
+#include "mlx4.h"
+
+u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
+{
+       u32 obj;
+
+       spin_lock(&bitmap->lock);
+
+       obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
+       if (obj >= bitmap->max) {
+               bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+                               & bitmap->mask;
+               obj = find_first_zero_bit(bitmap->table, bitmap->max);
+       }
+
+       if (obj < bitmap->max) {
+               set_bit(obj, bitmap->table);
+               bitmap->last = (obj + 1);
+               if (bitmap->last == bitmap->max)
+                       bitmap->last = 0;
+               obj |= bitmap->top;
+       } else
+               obj = -1;
+
+       if (obj != -1)
+               --bitmap->avail;
+
+       spin_unlock(&bitmap->lock);
+
+       return obj;
+}
+
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr)
+{
+       mlx4_bitmap_free_range(bitmap, obj, 1, use_rr);
+}
+
+static unsigned long find_aligned_range(unsigned long *bitmap,
+                                       u32 start, u32 nbits,
+                                       int len, int align, u32 skip_mask)
+{
+       unsigned long end, i;
+
+again:
+       start = ALIGN(start, align);
+
+       while ((start < nbits) && (test_bit(start, bitmap) ||
+                                  (start & skip_mask)))
+               start += align;
+
+       if (start >= nbits)
+               return -1;
+
+       end = start+len;
+       if (end > nbits)
+               return -1;
+
+       for (i = start + 1; i < end; i++) {
+               if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
+                       start = i + 1;
+                       goto again;
+               }
+       }
+
+       return start;
+}
+
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+                           int align, u32 skip_mask)
+{
+       u32 obj;
+
+       if (likely(cnt == 1 && align == 1 && !skip_mask))
+               return mlx4_bitmap_alloc(bitmap);
+
+       spin_lock(&bitmap->lock);
+
+       obj = find_aligned_range(bitmap->table, bitmap->last,
+                                bitmap->max, cnt, align, skip_mask);
+       if (obj >= bitmap->max) {
+               bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+                               & bitmap->mask;
+               obj = find_aligned_range(bitmap->table, 0, bitmap->max,
+                                        cnt, align, skip_mask);
+       }
+
+       if (obj < bitmap->max) {
+               bitmap_set(bitmap->table, obj, cnt);
+               if (obj == bitmap->last) {
+                       bitmap->last = (obj + cnt);
+                       if (bitmap->last >= bitmap->max)
+                               bitmap->last = 0;
+               }
+               obj |= bitmap->top;
+       } else
+               obj = -1;
+
+       if (obj != -1)
+               bitmap->avail -= cnt;
+
+       spin_unlock(&bitmap->lock);
+
+       return obj;
+}
+
+u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
+{
+       return bitmap->avail;
+}
+
+static u32 mlx4_bitmap_masked_value(struct mlx4_bitmap *bitmap, u32 obj)
+{
+       return obj & (bitmap->max + bitmap->reserved_top - 1);
+}
+
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+                           int use_rr)
+{
+       obj &= bitmap->max + bitmap->reserved_top - 1;
+
+       spin_lock(&bitmap->lock);
+       if (!use_rr) {
+               bitmap->last = min(bitmap->last, obj);
+               bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+                               & bitmap->mask;
+       }
+       bitmap_clear(bitmap->table, obj, cnt);
+       bitmap->avail += cnt;
+       spin_unlock(&bitmap->lock);
+}
+
+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
+                    u32 reserved_bot, u32 reserved_top)
+{
+       /* num must be a power of 2 */
+       if (num != roundup_pow_of_two(num))
+               return -EINVAL;
+
+       bitmap->last = 0;
+       bitmap->top  = 0;
+       bitmap->max  = num - reserved_top;
+       bitmap->mask = mask;
+       bitmap->reserved_top = reserved_top;
+       bitmap->avail = num - reserved_top - reserved_bot;
+       bitmap->effective_len = bitmap->avail;
+       spin_lock_init(&bitmap->lock);
+       bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
+                               sizeof (long), GFP_KERNEL);
+       if (!bitmap->table)
+               return -ENOMEM;
+
+       bitmap_set(bitmap->table, 0, reserved_bot);
+
+       return 0;
+}
+
+void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
+{
+       kfree(bitmap->table);
+}
+
+struct mlx4_zone_allocator {
+       struct list_head                entries;
+       struct list_head                prios;
+       u32                             last_uid;
+       u32                             mask;
+       /* protect the zone_allocator from concurrent accesses */
+       spinlock_t                      lock;
+       enum mlx4_zone_alloc_flags      flags;
+};
+
+struct mlx4_zone_entry {
+       struct list_head                list;
+       struct list_head                prio_list;
+       u32                             uid;
+       struct mlx4_zone_allocator      *allocator;
+       struct mlx4_bitmap              *bitmap;
+       int                             use_rr;
+       int                             priority;
+       int                             offset;
+       enum mlx4_zone_flags            flags;
+};
+
+struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags)
+{
+       struct mlx4_zone_allocator *zones = kmalloc(sizeof(*zones), GFP_KERNEL);
+
+       if (NULL == zones)
+               return NULL;
+
+       INIT_LIST_HEAD(&zones->entries);
+       INIT_LIST_HEAD(&zones->prios);
+       spin_lock_init(&zones->lock);
+       zones->last_uid = 0;
+       zones->mask = 0;
+       zones->flags = flags;
+
+       return zones;
+}
+
+int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc,
+                     struct mlx4_bitmap *bitmap,
+                     u32 flags,
+                     int priority,
+                     int offset,
+                     u32 *puid)
+{
+       u32 mask = mlx4_bitmap_masked_value(bitmap, (u32)-1);
+       struct mlx4_zone_entry *it;
+       struct mlx4_zone_entry *zone = kmalloc(sizeof(*zone), GFP_KERNEL);
+
+       if (NULL == zone)
+               return -ENOMEM;
+
+       zone->flags = flags;
+       zone->bitmap = bitmap;
+       zone->use_rr = (flags & MLX4_ZONE_USE_RR) ? MLX4_USE_RR : 0;
+       zone->priority = priority;
+       zone->offset = offset;
+
+       spin_lock(&zone_alloc->lock);
+
+       zone->uid = zone_alloc->last_uid++;
+       zone->allocator = zone_alloc;
+
+       if (zone_alloc->mask < mask)
+               zone_alloc->mask = mask;
+
+       list_for_each_entry(it, &zone_alloc->prios, prio_list)
+               if (it->priority >= priority)
+                       break;
+
+       if (&it->prio_list == &zone_alloc->prios || it->priority > priority)
+               list_add_tail(&zone->prio_list, &it->prio_list);
+       list_add_tail(&zone->list, &it->list);
+
+       spin_unlock(&zone_alloc->lock);
+
+       *puid = zone->uid;
+
+       return 0;
+}
+
+/* Should be called under a lock */
+static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry)
+{
+       struct mlx4_zone_allocator *zone_alloc = entry->allocator;
+
+       if (!list_empty(&entry->prio_list)) {
+               /* Check if we need to add an alternative node to the prio list */
+               if (!list_is_last(&entry->list, &zone_alloc->entries)) {
+                       struct mlx4_zone_entry *next = list_first_entry(&entry->list,
+                                                                       typeof(*next),
+                                                                       list);
+
+                       if (next->priority == entry->priority)
+                               list_add_tail(&next->prio_list, &entry->prio_list);
+               }
+
+               list_del(&entry->prio_list);
+       }
+
+       list_del(&entry->list);
+
+       if (zone_alloc->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP) {
+               u32 mask = 0;
+               struct mlx4_zone_entry *it;
+
+               list_for_each_entry(it, &zone_alloc->prios, prio_list) {
+                       u32 cur_mask = mlx4_bitmap_masked_value(it->bitmap, (u32)-1);
+
+                       if (mask < cur_mask)
+                               mask = cur_mask;
+               }
+               zone_alloc->mask = mask;
+       }
+
+       return 0;
+}
+
+void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc)
+{
+       struct mlx4_zone_entry *zone, *tmp;
+
+       spin_lock(&zone_alloc->lock);
+
+       list_for_each_entry_safe(zone, tmp, &zone_alloc->entries, list) {
+               list_del(&zone->list);
+               list_del(&zone->prio_list);
+               kfree(zone);
+       }
+
+       spin_unlock(&zone_alloc->lock);
+       kfree(zone_alloc);
+}
+
+/* Should be called under a lock */
+static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count,
+                                 int align, u32 skip_mask, u32 *puid)
+{
+       u32 uid;
+       u32 res;
+       struct mlx4_zone_allocator *zone_alloc = zone->allocator;
+       struct mlx4_zone_entry *curr_node;
+
+       res = mlx4_bitmap_alloc_range(zone->bitmap, count,
+                                     align, skip_mask);
+
+       if (res != (u32)-1) {
+               res += zone->offset;
+               uid = zone->uid;
+               goto out;
+       }
+
+       list_for_each_entry(curr_node, &zone_alloc->prios, prio_list) {
+               if (unlikely(curr_node->priority == zone->priority))
+                       break;
+       }
+
+       if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO) {
+               struct mlx4_zone_entry *it = curr_node;
+
+               list_for_each_entry_continue_reverse(it, &zone_alloc->entries, list) {
+                       res = mlx4_bitmap_alloc_range(it->bitmap, count,
+                                                     align, skip_mask);
+                       if (res != (u32)-1) {
+                               res += it->offset;
+                               uid = it->uid;
+                               goto out;
+                       }
+               }
+       }
+
+       if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO) {
+               struct mlx4_zone_entry *it = curr_node;
+
+               list_for_each_entry_from(it, &zone_alloc->entries, list) {
+                       if (unlikely(it == zone))
+                               continue;
+
+                       if (unlikely(it->priority != curr_node->priority))
+                               break;
+
+                       res = mlx4_bitmap_alloc_range(it->bitmap, count,
+                                                     align, skip_mask);
+                       if (res != (u32)-1) {
+                               res += it->offset;
+                               uid = it->uid;
+                               goto out;
+                       }
+               }
+       }
+
+       if (zone->flags & MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO) {
+               if (list_is_last(&curr_node->prio_list, &zone_alloc->prios))
+                       goto out;
+
+               curr_node = list_first_entry(&curr_node->prio_list,
+                                            typeof(*curr_node),
+                                            prio_list);
+
+               list_for_each_entry_from(curr_node, &zone_alloc->entries, list) {
+                       res = mlx4_bitmap_alloc_range(curr_node->bitmap, count,
+                                                     align, skip_mask);
+                       if (res != (u32)-1) {
+                               res += curr_node->offset;
+                               uid = curr_node->uid;
+                               goto out;
+                       }
+               }
+       }
+
+out:
+       if (NULL != puid && res != (u32)-1)
+               *puid = uid;
+       return res;
+}
+
+/* Should be called under a lock */
+static void __mlx4_free_from_zone(struct mlx4_zone_entry *zone, u32 obj,
+                                 u32 count)
+{
+       mlx4_bitmap_free_range(zone->bitmap, obj - zone->offset, count, zone->use_rr);
+}
+
+/* Should be called under a lock */
+static struct mlx4_zone_entry *__mlx4_find_zone_by_uid(
+               struct mlx4_zone_allocator *zones, u32 uid)
+{
+       struct mlx4_zone_entry *zone;
+
+       list_for_each_entry(zone, &zones->entries, list) {
+               if (zone->uid == uid)
+                       return zone;
+       }
+
+       return NULL;
+}
+
+struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid)
+{
+       struct mlx4_zone_entry *zone;
+       struct mlx4_bitmap *bitmap;
+
+       spin_lock(&zones->lock);
+
+       zone = __mlx4_find_zone_by_uid(zones, uid);
+
+       bitmap = zone == NULL ? NULL : zone->bitmap;
+
+       spin_unlock(&zones->lock);
+
+       return bitmap;
+}
+
+int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid)
+{
+       struct mlx4_zone_entry *zone;
+       int res;
+
+       spin_lock(&zones->lock);
+
+       zone = __mlx4_find_zone_by_uid(zones, uid);
+
+       if (NULL == zone) {
+               res = -1;
+               goto out;
+       }
+
+       res = __mlx4_zone_remove_one_entry(zone);
+
+out:
+       spin_unlock(&zones->lock);
+       kfree(zone);
+
+       return res;
+}
+
+/* Should be called under a lock */
+static struct mlx4_zone_entry *__mlx4_find_zone_by_uid_unique(
+               struct mlx4_zone_allocator *zones, u32 obj)
+{
+       struct mlx4_zone_entry *zone, *zone_candidate = NULL;
+       u32 dist = (u32)-1;
+
+       /* Search for the smallest zone that this obj could be
+        * allocated from. This is done in order to handle
+        * situations when small bitmaps are allocated from bigger
+        * bitmaps (and the allocated space is marked as reserved in
+        * the bigger bitmap.
+        */
+       list_for_each_entry(zone, &zones->entries, list) {
+               if (obj >= zone->offset) {
+                       u32 mobj = (obj - zone->offset) & zones->mask;
+
+                       if (mobj < zone->bitmap->max) {
+                               u32 curr_dist = zone->bitmap->effective_len;
+
+                               if (curr_dist < dist) {
+                                       dist = curr_dist;
+                                       zone_candidate = zone;
+                               }
+                       }
+               }
+       }
+
+       return zone_candidate;
+}
+
+u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
+                           int align, u32 skip_mask, u32 *puid)
+{
+       struct mlx4_zone_entry *zone;
+       int res = -1;
+
+       spin_lock(&zones->lock);
+
+       zone = __mlx4_find_zone_by_uid(zones, uid);
+
+       if (NULL == zone)
+               goto out;
+
+       res = __mlx4_alloc_from_zone(zone, count, align, skip_mask, puid);
+
+out:
+       spin_unlock(&zones->lock);
+
+       return res;
+}
+
+u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count)
+{
+       struct mlx4_zone_entry *zone;
+       int res = 0;
+
+       spin_lock(&zones->lock);
+
+       zone = __mlx4_find_zone_by_uid(zones, uid);
+
+       if (NULL == zone) {
+               res = -1;
+               goto out;
+       }
+
+       __mlx4_free_from_zone(zone, obj, count);
+
+out:
+       spin_unlock(&zones->lock);
+
+       return res;
+}
+
+u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count)
+{
+       struct mlx4_zone_entry *zone;
+       int res;
+
+       if (!(zones->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP))
+               return -EFAULT;
+
+       spin_lock(&zones->lock);
+
+       zone = __mlx4_find_zone_by_uid_unique(zones, obj);
+
+       if (NULL == zone) {
+               res = -1;
+               goto out;
+       }
+
+       __mlx4_free_from_zone(zone, obj, count);
+       res = 0;
+
+out:
+       spin_unlock(&zones->lock);
+
+       return res;
+}
+/*
+ * Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.  If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
+                  struct mlx4_buf *buf, gfp_t gfp)
+{
+       dma_addr_t t;
+
+       if (size <= max_direct) {
+               buf->nbufs        = 1;
+               buf->npages       = 1;
+               buf->page_shift   = get_order(size) + PAGE_SHIFT;
+               buf->direct.buf   = dma_alloc_coherent(&dev->persist->pdev->dev,
+                                                      size, &t, gfp);
+               if (!buf->direct.buf)
+                       return -ENOMEM;
+
+               buf->direct.map = t;
+
+               while (t & ((1 << buf->page_shift) - 1)) {
+                       --buf->page_shift;
+                       buf->npages *= 2;
+               }
+
+               memset(buf->direct.buf, 0, size);
+       } else {
+               int i;
+
+               buf->direct.buf  = NULL;
+               buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+               buf->npages      = buf->nbufs;
+               buf->page_shift  = PAGE_SHIFT;
+               buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
+                                          gfp);
+               if (!buf->page_list)
+                       return -ENOMEM;
+
+               for (i = 0; i < buf->nbufs; ++i) {
+                       buf->page_list[i].buf =
+                               dma_alloc_coherent(&dev->persist->pdev->dev,
+                                                  PAGE_SIZE,
+                                                  &t, gfp);
+                       if (!buf->page_list[i].buf)
+                               goto err_free;
+
+                       buf->page_list[i].map = t;
+
+                       memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+               }
+
+               if (BITS_PER_LONG == 64) {
+                       struct page **pages;
+                       pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
+                       if (!pages)
+                               goto err_free;
+                       for (i = 0; i < buf->nbufs; ++i)
+                               pages[i] = virt_to_page(buf->page_list[i].buf);
+                       buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+                       kfree(pages);
+                       if (!buf->direct.buf)
+                               goto err_free;
+               }
+       }
+
+       return 0;
+
+err_free:
+       mlx4_buf_free(dev, size, buf);
+
+       return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
+
+void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
+{
+       int i;
+
+       if (buf->nbufs == 1)
+               dma_free_coherent(&dev->persist->pdev->dev, size,
+                                 buf->direct.buf,
+                                 buf->direct.map);
+       else {
+               if (BITS_PER_LONG == 64)
+                       vunmap(buf->direct.buf);
+
+               for (i = 0; i < buf->nbufs; ++i)
+                       if (buf->page_list[i].buf)
+                               dma_free_coherent(&dev->persist->pdev->dev,
+                                                 PAGE_SIZE,
+                                                 buf->page_list[i].buf,
+                                                 buf->page_list[i].map);
+               kfree(buf->page_list);
+       }
+}
+EXPORT_SYMBOL_GPL(mlx4_buf_free);
+
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
+                                                gfp_t gfp)
+{
+       struct mlx4_db_pgdir *pgdir;
+
+       pgdir = kzalloc(sizeof *pgdir, gfp);
+       if (!pgdir)
+               return NULL;
+
+       bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2);
+       pgdir->bits[0] = pgdir->order0;
+       pgdir->bits[1] = pgdir->order1;
+       pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+                                           &pgdir->db_dma, gfp);
+       if (!pgdir->db_page) {
+               kfree(pgdir);
+               return NULL;
+       }
+
+       return pgdir;
+}
+
+static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir,
+                                   struct mlx4_db *db, int order)
+{
+       int o;
+       int i;
+
+       for (o = order; o <= 1; ++o) {
+               i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o);
+               if (i < MLX4_DB_PER_PAGE >> o)
+                       goto found;
+       }
+
+       return -ENOMEM;
+
+found:
+       clear_bit(i, pgdir->bits[o]);
+
+       i <<= o;
+
+       if (o > order)
+               set_bit(i ^ 1, pgdir->bits[order]);
+
+       db->u.pgdir = pgdir;
+       db->index   = i;
+       db->db      = pgdir->db_page + db->index;
+       db->dma     = pgdir->db_dma  + db->index * 4;
+       db->order   = order;
+
+       return 0;
+}
+
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_db_pgdir *pgdir;
+       int ret = 0;
+
+       mutex_lock(&priv->pgdir_mutex);
+
+       list_for_each_entry(pgdir, &priv->pgdir_list, list)
+               if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
+                       goto out;
+
+       pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
+       if (!pgdir) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       list_add(&pgdir->list, &priv->pgdir_list);
+
+       /* This should never fail -- we just allocated an empty page: */
+       WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+       mutex_unlock(&priv->pgdir_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_db_alloc);
+
+void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int o;
+       int i;
+
+       mutex_lock(&priv->pgdir_mutex);
+
+       o = db->order;
+       i = db->index;
+
+       if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+               clear_bit(i ^ 1, db->u.pgdir->order0);
+               ++o;
+       }
+       i >>= o;
+       set_bit(i, db->u.pgdir->bits[o]);
+
+       if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
+               dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+                                 db->u.pgdir->db_page, db->u.pgdir->db_dma);
+               list_del(&db->u.pgdir->list);
+               kfree(db->u.pgdir);
+       }
+
+       mutex_unlock(&priv->pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_db_free);
+
+int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+                      int size, int max_direct)
+{
+       int err;
+
+       err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
+       if (err)
+               return err;
+
+       *wqres->db.db = 0;
+
+       err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
+       if (err)
+               goto err_db;
+
+       err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift,
+                           &wqres->mtt);
+       if (err)
+               goto err_buf;
+
+       err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
+       if (err)
+               goto err_mtt;
+
+       return 0;
+
+err_mtt:
+       mlx4_mtt_cleanup(dev, &wqres->mtt);
+err_buf:
+       mlx4_buf_free(dev, size, &wqres->buf);
+err_db:
+       mlx4_db_free(dev, &wqres->db);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res);
+
+void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+                      int size)
+{
+       mlx4_mtt_cleanup(dev, &wqres->mtt);
+       mlx4_buf_free(dev, size, &wqres->buf);
+       mlx4_db_free(dev, &wqres->db);
+}
+EXPORT_SYMBOL_GPL(mlx4_free_hwq_res);
diff --git a/kern/drivers/net/mlx4/catas.c b/kern/drivers/net/mlx4/catas.c
new file mode 100644 (file)
index 0000000..715de8a
--- /dev/null
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/workqueue.h>
+#include <linux/module.h>
+
+#include "mlx4.h"
+
+enum {
+       MLX4_CATAS_POLL_INTERVAL        = 5 * HZ,
+};
+
+
+
+int mlx4_internal_err_reset = 1;
+module_param_named(internal_err_reset, mlx4_internal_err_reset,  int, 0644);
+MODULE_PARM_DESC(internal_err_reset,
+                "Reset device on internal errors if non-zero (default 1)");
+
+static int read_vendor_id(struct mlx4_dev *dev)
+{
+       u16 vendor_id = 0;
+       int ret;
+
+       ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id);
+       if (ret) {
+               mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret);
+               return ret;
+       }
+
+       if (vendor_id == 0xffff) {
+               mlx4_err(dev, "PCI can't be accessed to read vendor id\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int mlx4_reset_master(struct mlx4_dev *dev)
+{
+       int err = 0;
+
+       if (mlx4_is_master(dev))
+               mlx4_report_internal_err_comm_event(dev);
+
+       if (!pci_channel_offline(dev->persist->pdev)) {
+               err = read_vendor_id(dev);
+               /* If PCI can't be accessed to read vendor ID we assume that its
+                * link was disabled and chip was already reset.
+                */
+               if (err)
+                       return 0;
+
+               err = mlx4_reset(dev);
+               if (err)
+                       mlx4_err(dev, "Fail to reset HCA\n");
+       }
+
+       return err;
+}
+
+static int mlx4_reset_slave(struct mlx4_dev *dev)
+{
+#define COM_CHAN_RST_REQ_OFFSET 0x10
+#define COM_CHAN_RST_ACK_OFFSET 0x08
+
+       u32 comm_flags;
+       u32 rst_req;
+       u32 rst_ack;
+       unsigned long end;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (pci_channel_offline(dev->persist->pdev))
+               return 0;
+
+       comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+                                 MLX4_COMM_CHAN_FLAGS));
+       if (comm_flags == 0xffffffff) {
+               mlx4_err(dev, "VF reset is not needed\n");
+               return 0;
+       }
+
+       if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
+               mlx4_err(dev, "VF reset is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+               COM_CHAN_RST_REQ_OFFSET;
+       rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+               COM_CHAN_RST_ACK_OFFSET;
+       if (rst_req != rst_ack) {
+               mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
+               return -EIO;
+       }
+
+       rst_req ^= 1;
+       mlx4_warn(dev, "VF is sending reset request to Firmware\n");
+       comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
+       __raw_writel((__force u32)cpu_to_be32(comm_flags),
+                    (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
+       /* Make sure that our comm channel write doesn't
+        * get mixed in with writes from another CPU.
+        */
+       mmiowb();
+
+       end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
+       while (time_before(jiffies, end)) {
+               comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+                                         MLX4_COMM_CHAN_FLAGS));
+               rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+                       COM_CHAN_RST_ACK_OFFSET;
+
+               /* Reading rst_req again since the communication channel can
+                * be reset at any time by the PF and all its bits will be
+                * set to zero.
+                */
+               rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+                       COM_CHAN_RST_REQ_OFFSET;
+
+               if (rst_ack == rst_req) {
+                       mlx4_warn(dev, "VF Reset succeed\n");
+                       return 0;
+               }
+               cond_resched();
+       }
+       mlx4_err(dev, "Fail to send reset over the communication channel\n");
+       return -ETIMEDOUT;
+}
+
+static int mlx4_comm_internal_err(u32 slave_read)
+{
+       return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
+               (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
+}
+
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
+{
+       int err;
+       struct mlx4_dev *dev;
+
+       if (!mlx4_internal_err_reset)
+               return;
+
+       mutex_lock(&persist->device_state_mutex);
+       if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
+
+       dev = persist->dev;
+       mlx4_err(dev, "device is going to be reset\n");
+       if (mlx4_is_slave(dev))
+               err = mlx4_reset_slave(dev);
+       else
+               err = mlx4_reset_master(dev);
+       BUG_ON(err != 0);
+
+       dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
+       mlx4_err(dev, "device was reset successfully\n");
+       mutex_unlock(&persist->device_state_mutex);
+
+       /* At that step HW was already reset, now notify clients */
+       mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+       mlx4_cmd_wake_completions(dev);
+       return;
+
+out:
+       mutex_unlock(&persist->device_state_mutex);
+}
+
+static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
+{
+       int err = 0;
+
+       mlx4_enter_error_state(persist);
+       mutex_lock(&persist->interface_state_mutex);
+       if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
+           !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
+               err = mlx4_restart_one(persist->pdev);
+               mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
+                         err);
+       }
+       mutex_unlock(&persist->interface_state_mutex);
+}
+
+static void dump_err_buf(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       int i;
+
+       mlx4_err(dev, "Internal error detected:\n");
+       for (i = 0; i < priv->fw.catas_size; ++i)
+               mlx4_err(dev, "  buf[%02x]: %08x\n",
+                        i, swab32(readl(priv->catas_err.map + i)));
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+       struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       u32 slave_read;
+
+       if (mlx4_is_slave(dev)) {
+               slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+               if (mlx4_comm_internal_err(slave_read)) {
+                       mlx4_warn(dev, "Internal error detected on the communication channel\n");
+                       goto internal_err;
+               }
+       } else if (readl(priv->catas_err.map)) {
+               dump_err_buf(dev);
+               goto internal_err;
+       }
+
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+               mlx4_warn(dev, "Internal error mark was detected on device\n");
+               goto internal_err;
+       }
+
+       mod_timer(&priv->catas_err.timer,
+                 round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+       return;
+
+internal_err:
+       if (mlx4_internal_err_reset)
+               queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
+}
+
+static void catas_reset(struct work_struct *work)
+{
+       struct mlx4_dev_persistent *persist =
+               container_of(work, struct mlx4_dev_persistent,
+                            catas_work);
+
+       mlx4_handle_error_state(persist);
+}
+
+void mlx4_start_catas_poll(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       phys_addr_t addr;
+
+       INIT_LIST_HEAD(&priv->catas_err.list);
+       init_timer(&priv->catas_err.timer);
+       priv->catas_err.map = NULL;
+
+       if (!mlx4_is_slave(dev)) {
+               addr = pci_resource_start(dev->persist->pdev,
+                                         priv->fw.catas_bar) +
+                                         priv->fw.catas_offset;
+
+               priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
+               if (!priv->catas_err.map) {
+                       mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+                                 (unsigned long long)addr);
+                       return;
+               }
+       }
+
+       priv->catas_err.timer.data     = (unsigned long) dev;
+       priv->catas_err.timer.function = poll_catas;
+       priv->catas_err.timer.expires  =
+               round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
+       add_timer(&priv->catas_err.timer);
+}
+
+void mlx4_stop_catas_poll(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       del_timer_sync(&priv->catas_err.timer);
+
+       if (priv->catas_err.map) {
+               iounmap(priv->catas_err.map);
+               priv->catas_err.map = NULL;
+       }
+
+       if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
+               flush_workqueue(dev->persist->catas_wq);
+}
+
+int  mlx4_catas_init(struct mlx4_dev *dev)
+{
+       INIT_WORK(&dev->persist->catas_work, catas_reset);
+       dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
+       if (!dev->persist->catas_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void mlx4_catas_end(struct mlx4_dev *dev)
+{
+       if (dev->persist->catas_wq) {
+               destroy_workqueue(dev->persist->catas_wq);
+               dev->persist->catas_wq = NULL;
+       }
+}
diff --git a/kern/drivers/net/mlx4/cmd.c b/kern/drivers/net/mlx4/cmd.c
new file mode 100644 (file)
index 0000000..529ef05
--- /dev/null
@@ -0,0 +1,3212 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/device.h>
+#include <linux/semaphore.h>
+#include <rdma/ib_smi.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+
+#include "mlx4.h"
+#include "fw.h"
+#include "fw_qos.h"
+
+#define CMD_POLL_TOKEN 0xffff
+#define INBOX_MASK     0xffffffffffffff00ULL
+
+#define CMD_CHAN_VER 1
+#define CMD_CHAN_IF_REV 1
+
+enum {
+       /* command completed successfully: */
+       CMD_STAT_OK             = 0x00,
+       /* Internal error (such as a bus error) occurred while processing command: */
+       CMD_STAT_INTERNAL_ERR   = 0x01,
+       /* Operation/command not supported or opcode modifier not supported: */
+       CMD_STAT_BAD_OP         = 0x02,
+       /* Parameter not supported or parameter out of range: */
+       CMD_STAT_BAD_PARAM      = 0x03,
+       /* System not enabled or bad system state: */
+       CMD_STAT_BAD_SYS_STATE  = 0x04,
+       /* Attempt to access reserved or unallocaterd resource: */
+       CMD_STAT_BAD_RESOURCE   = 0x05,
+       /* Requested resource is currently executing a command, or is otherwise busy: */
+       CMD_STAT_RESOURCE_BUSY  = 0x06,
+       /* Required capability exceeds device limits: */
+       CMD_STAT_EXCEED_LIM     = 0x08,
+       /* Resource is not in the appropriate state or ownership: */
+       CMD_STAT_BAD_RES_STATE  = 0x09,
+       /* Index out of range: */
+       CMD_STAT_BAD_INDEX      = 0x0a,
+       /* FW image corrupted: */
+       CMD_STAT_BAD_NVMEM      = 0x0b,
+       /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */
+       CMD_STAT_ICM_ERROR      = 0x0c,
+       /* Attempt to modify a QP/EE which is not in the presumed state: */
+       CMD_STAT_BAD_QP_STATE   = 0x10,
+       /* Bad segment parameters (Address/Size): */
+       CMD_STAT_BAD_SEG_PARAM  = 0x20,
+       /* Memory Region has Memory Windows bound to: */
+       CMD_STAT_REG_BOUND      = 0x21,
+       /* HCA local attached memory not present: */
+       CMD_STAT_LAM_NOT_PRE    = 0x22,
+       /* Bad management packet (silently discarded): */
+       CMD_STAT_BAD_PKT        = 0x30,
+       /* More outstanding CQEs in CQ than new CQ size: */
+       CMD_STAT_BAD_SIZE       = 0x40,
+       /* Multi Function device support required: */
+       CMD_STAT_MULTI_FUNC_REQ = 0x50,
+};
+
+enum {
+       HCR_IN_PARAM_OFFSET     = 0x00,
+       HCR_IN_MODIFIER_OFFSET  = 0x08,
+       HCR_OUT_PARAM_OFFSET    = 0x0c,
+       HCR_TOKEN_OFFSET        = 0x14,
+       HCR_STATUS_OFFSET       = 0x18,
+
+       HCR_OPMOD_SHIFT         = 12,
+       HCR_T_BIT               = 21,
+       HCR_E_BIT               = 22,
+       HCR_GO_BIT              = 23
+};
+
+enum {
+       GO_BIT_TIMEOUT_MSECS    = 10000
+};
+
+enum mlx4_vlan_transition {
+       MLX4_VLAN_TRANSITION_VST_VST = 0,
+       MLX4_VLAN_TRANSITION_VST_VGT = 1,
+       MLX4_VLAN_TRANSITION_VGT_VST = 2,
+       MLX4_VLAN_TRANSITION_VGT_VGT = 3,
+};
+
+
+struct mlx4_cmd_context {
+       struct completion       done;
+       int                     result;
+       int                     next;
+       u64                     out_param;
+       u16                     token;
+       u8                      fw_status;
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+                                   struct mlx4_vhcr_cmd *in_vhcr);
+
+static int mlx4_status_to_errno(u8 status)
+{
+       static const int trans_table[] = {
+               [CMD_STAT_INTERNAL_ERR]   = -EIO,
+               [CMD_STAT_BAD_OP]         = -EPERM,
+               [CMD_STAT_BAD_PARAM]      = -EINVAL,
+               [CMD_STAT_BAD_SYS_STATE]  = -ENXIO,
+               [CMD_STAT_BAD_RESOURCE]   = -EBADF,
+               [CMD_STAT_RESOURCE_BUSY]  = -EBUSY,
+               [CMD_STAT_EXCEED_LIM]     = -ENOMEM,
+               [CMD_STAT_BAD_RES_STATE]  = -EBADF,
+               [CMD_STAT_BAD_INDEX]      = -EBADF,
+               [CMD_STAT_BAD_NVMEM]      = -EFAULT,
+               [CMD_STAT_ICM_ERROR]      = -ENFILE,
+               [CMD_STAT_BAD_QP_STATE]   = -EINVAL,
+               [CMD_STAT_BAD_SEG_PARAM]  = -EFAULT,
+               [CMD_STAT_REG_BOUND]      = -EBUSY,
+               [CMD_STAT_LAM_NOT_PRE]    = -EAGAIN,
+               [CMD_STAT_BAD_PKT]        = -EINVAL,
+               [CMD_STAT_BAD_SIZE]       = -ENOMEM,
+               [CMD_STAT_MULTI_FUNC_REQ] = -EACCES,
+       };
+
+       if (status >= ARRAY_SIZE(trans_table) ||
+           (status != CMD_STAT_OK && trans_table[status] == 0))
+               return -EIO;
+
+       return trans_table[status];
+}
+
+static u8 mlx4_errno_to_status(int errno)
+{
+       switch (errno) {
+       case -EPERM:
+               return CMD_STAT_BAD_OP;
+       case -EINVAL:
+               return CMD_STAT_BAD_PARAM;
+       case -ENXIO:
+               return CMD_STAT_BAD_SYS_STATE;
+       case -EBUSY:
+               return CMD_STAT_RESOURCE_BUSY;
+       case -ENOMEM:
+               return CMD_STAT_EXCEED_LIM;
+       case -ENFILE:
+               return CMD_STAT_ICM_ERROR;
+       default:
+               return CMD_STAT_INTERNAL_ERR;
+       }
+}
+
+static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op,
+                                      u8 op_modifier)
+{
+       switch (op) {
+       case MLX4_CMD_UNMAP_ICM:
+       case MLX4_CMD_UNMAP_ICM_AUX:
+       case MLX4_CMD_UNMAP_FA:
+       case MLX4_CMD_2RST_QP:
+       case MLX4_CMD_HW2SW_EQ:
+       case MLX4_CMD_HW2SW_CQ:
+       case MLX4_CMD_HW2SW_SRQ:
+       case MLX4_CMD_HW2SW_MPT:
+       case MLX4_CMD_CLOSE_HCA:
+       case MLX4_QP_FLOW_STEERING_DETACH:
+       case MLX4_CMD_FREE_RES:
+       case MLX4_CMD_CLOSE_PORT:
+               return CMD_STAT_OK;
+
+       case MLX4_CMD_QP_ATTACH:
+               /* On Detach case return success */
+               if (op_modifier == 0)
+                       return CMD_STAT_OK;
+               return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+       default:
+               return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+       }
+}
+
+static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status)
+{
+       /* Any error during the closing commands below is considered fatal */
+       if (op == MLX4_CMD_CLOSE_HCA ||
+           op == MLX4_CMD_HW2SW_EQ ||
+           op == MLX4_CMD_HW2SW_CQ ||
+           op == MLX4_CMD_2RST_QP ||
+           op == MLX4_CMD_HW2SW_SRQ ||
+           op == MLX4_CMD_SYNC_TPT ||
+           op == MLX4_CMD_UNMAP_ICM ||
+           op == MLX4_CMD_UNMAP_ICM_AUX ||
+           op == MLX4_CMD_UNMAP_FA)
+               return 1;
+       /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals
+         * CMD_STAT_REG_BOUND.
+         * This status indicates that memory region has memory windows bound to it
+         * which may result from invalid user space usage and is not fatal.
+         */
+       if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND)
+               return 1;
+       return 0;
+}
+
+static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier,
+                              int err)
+{
+       /* Only if reset flow is really active return code is based on
+         * command, otherwise current error code is returned.
+         */
+       if (mlx4_internal_err_reset) {
+               mlx4_enter_error_state(dev->persist);
+               err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+       }
+
+       return err;
+}
+
+static int comm_pending(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       u32 status = readl(&priv->mfunc.comm->slave_read);
+
+       return (swab32(status) >> 31) != priv->cmd.comm_toggle;
+}
+
+static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       u32 val;
+
+       /* To avoid writing to unknown addresses after the device state was
+        * changed to internal error and the function was rest,
+        * check the INTERNAL_ERROR flag which is updated under
+        * device_state_mutex lock.
+        */
+       mutex_lock(&dev->persist->device_state_mutex);
+
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+               mutex_unlock(&dev->persist->device_state_mutex);
+               return -EIO;
+       }
+
+       priv->cmd.comm_toggle ^= 1;
+       val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
+       __raw_writel((__force u32) cpu_to_be32(val),
+                    &priv->mfunc.comm->slave_write);
+       mmiowb();
+       mutex_unlock(&dev->persist->device_state_mutex);
+       return 0;
+}
+
+static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
+                      unsigned long timeout)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       unsigned long end;
+       int err = 0;
+       int ret_from_pending = 0;
+
+       /* First, verify that the master reports correct status */
+       if (comm_pending(dev)) {
+               mlx4_warn(dev, "Communication channel is not idle - my toggle is %d (cmd:0x%x)\n",
+                         priv->cmd.comm_toggle, cmd);
+               return -EAGAIN;
+       }
+
+       /* Write command */
+       down(&priv->cmd.poll_sem);
+       if (mlx4_comm_cmd_post(dev, cmd, param)) {
+               /* Only in case the device state is INTERNAL_ERROR,
+                * mlx4_comm_cmd_post returns with an error
+                */
+               err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+               goto out;
+       }
+
+       end = msecs_to_jiffies(timeout) + jiffies;
+       while (comm_pending(dev) && time_before(jiffies, end))
+               cond_resched();
+       ret_from_pending = comm_pending(dev);
+       if (ret_from_pending) {
+               /* check if the slave is trying to boot in the middle of
+                * FLR process. The only non-zero result in the RESET command
+                * is MLX4_DELAY_RESET_SLAVE*/
+               if ((MLX4_COMM_CMD_RESET == cmd)) {
+                       err = MLX4_DELAY_RESET_SLAVE;
+                       goto out;
+               } else {
+                       mlx4_warn(dev, "Communication channel command 0x%x timed out\n",
+                                 cmd);
+                       err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+               }
+       }
+
+       if (err)
+               mlx4_enter_error_state(dev->persist);
+out:
+       up(&priv->cmd.poll_sem);
+       return err;
+}
+
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd,
+                             u16 param, u16 op, unsigned long timeout)
+{
+       struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+       struct mlx4_cmd_context *context;
+       unsigned long end;
+       int err = 0;
+
+       down(&cmd->event_sem);
+
+       spin_lock(&cmd->context_lock);
+       BUG_ON(cmd->free_head < 0);
+       context = &cmd->context[cmd->free_head];
+       context->token += cmd->token_mask + 1;
+       cmd->free_head = context->next;
+       spin_unlock(&cmd->context_lock);
+
+       reinit_completion(&context->done);
+
+       if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) {
+               /* Only in case the device state is INTERNAL_ERROR,
+                * mlx4_comm_cmd_post returns with an error
+                */
+               err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+               goto out;
+       }
+
+       if (!wait_for_completion_timeout(&context->done,
+                                        msecs_to_jiffies(timeout))) {
+               mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n",
+                         vhcr_cmd, op);
+               goto out_reset;
+       }
+
+       err = context->result;
+       if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
+               mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+                        vhcr_cmd, context->fw_status);
+               if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+                       goto out_reset;
+       }
+
+       /* wait for comm channel ready
+        * this is necessary for prevention the race
+        * when switching between event to polling mode
+        * Skipping this section in case the device is in FATAL_ERROR state,
+        * In this state, no commands are sent via the comm channel until
+        * the device has returned from reset.
+        */
+       if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+               end = msecs_to_jiffies(timeout) + jiffies;
+               while (comm_pending(dev) && time_before(jiffies, end))
+                       cond_resched();
+       }
+       goto out;
+
+out_reset:
+       err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+       mlx4_enter_error_state(dev->persist);
+out:
+       spin_lock(&cmd->context_lock);
+       context->next = cmd->free_head;
+       cmd->free_head = context - cmd->context;
+       spin_unlock(&cmd->context_lock);
+
+       up(&cmd->event_sem);
+       return err;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+                 u16 op, unsigned long timeout)
+{
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+               return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+       if (mlx4_priv(dev)->cmd.use_events)
+               return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout);
+       return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
+}
+
+static int cmd_pending(struct mlx4_dev *dev)
+{
+       u32 status;
+
+       if (pci_channel_offline(dev->persist->pdev))
+               return -EIO;
+
+       status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+
+       return (status & swab32(1 << HCR_GO_BIT)) ||
+               (mlx4_priv(dev)->cmd.toggle ==
+                !!(status & swab32(1 << HCR_T_BIT)));
+}
+
+static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
+                        u32 in_modifier, u8 op_modifier, u16 op, u16 token,
+                        int event)
+{
+       struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+       u32 __iomem *hcr = cmd->hcr;
+       int ret = -EIO;
+       unsigned long end;
+
+       mutex_lock(&dev->persist->device_state_mutex);
+       /* To avoid writing to unknown addresses after the device state was
+         * changed to internal error and the chip was reset,
+         * check the INTERNAL_ERROR flag which is updated under
+         * device_state_mutex lock.
+         */
+       if (pci_channel_offline(dev->persist->pdev) ||
+           (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+               /*
+                * Device is going through error recovery
+                * and cannot accept commands.
+                */
+               goto out;
+       }
+
+       end = jiffies;
+       if (event)
+               end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
+
+       while (cmd_pending(dev)) {
+               if (pci_channel_offline(dev->persist->pdev)) {
+                       /*
+                        * Device is going through error recovery
+                        * and cannot accept commands.
+                        */
+                       goto out;
+               }
+
+               if (time_after_eq(jiffies, end)) {
+                       mlx4_err(dev, "%s:cmd_pending failed\n", __func__);
+                       goto out;
+               }
+               cond_resched();
+       }
+
+       /*
+        * We use writel (instead of something like memcpy_toio)
+        * because writes of less than 32 bits to the HCR don't work
+        * (and some architectures such as ia64 implement memcpy_toio
+        * in terms of writeb).
+        */
+       __raw_writel((__force u32) cpu_to_be32(in_param >> 32),           hcr + 0);
+       __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful),  hcr + 1);
+       __raw_writel((__force u32) cpu_to_be32(in_modifier),              hcr + 2);
+       __raw_writel((__force u32) cpu_to_be32(out_param >> 32),          hcr + 3);
+       __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4);
+       __raw_writel((__force u32) cpu_to_be32(token << 16),              hcr + 5);
+
+       /* __raw_writel may not order writes. */
+       wmb();
+
+       __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT)                |
+                                              (cmd->toggle << HCR_T_BIT)       |
+                                              (event ? (1 << HCR_E_BIT) : 0)   |
+                                              (op_modifier << HCR_OPMOD_SHIFT) |
+                                              op), hcr + 6);
+
+       /*
+        * Make sure that our HCR writes don't get mixed in with
+        * writes from another CPU starting a FW command.
+        */
+       mmiowb();
+
+       cmd->toggle = cmd->toggle ^ 1;
+
+       ret = 0;
+
+out:
+       if (ret)
+               mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n",
+                         op, ret, in_param, in_modifier, op_modifier);
+       mutex_unlock(&dev->persist->device_state_mutex);
+
+       return ret;
+}
+
+static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+                         int out_is_imm, u32 in_modifier, u8 op_modifier,
+                         u16 op, unsigned long timeout)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
+       int ret;
+
+       mutex_lock(&priv->cmd.slave_cmd_mutex);
+
+       vhcr->in_param = cpu_to_be64(in_param);
+       vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
+       vhcr->in_modifier = cpu_to_be32(in_modifier);
+       vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff));
+       vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
+       vhcr->status = 0;
+       vhcr->flags = !!(priv->cmd.use_events) << 6;
+
+       if (mlx4_is_master(dev)) {
+               ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
+               if (!ret) {
+                       if (out_is_imm) {
+                               if (out_param)
+                                       *out_param =
+                                               be64_to_cpu(vhcr->out_param);
+                               else {
+                                       mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+                                                op);
+                                       vhcr->status = CMD_STAT_BAD_PARAM;
+                               }
+                       }
+                       ret = mlx4_status_to_errno(vhcr->status);
+               }
+               if (ret &&
+                   dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                       ret = mlx4_internal_err_ret_value(dev, op, op_modifier);
+       } else {
+               ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op,
+                                   MLX4_COMM_TIME + timeout);
+               if (!ret) {
+                       if (out_is_imm) {
+                               if (out_param)
+                                       *out_param =
+                                               be64_to_cpu(vhcr->out_param);
+                               else {
+                                       mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+                                                op);
+                                       vhcr->status = CMD_STAT_BAD_PARAM;
+                               }
+                       }
+                       ret = mlx4_status_to_errno(vhcr->status);
+               } else {
+                       if (dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                               ret = mlx4_internal_err_ret_value(dev, op,
+                                                                 op_modifier);
+                       else
+                               mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op);
+               }
+       }
+
+       mutex_unlock(&priv->cmd.slave_cmd_mutex);
+       return ret;
+}
+
+static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+                        int out_is_imm, u32 in_modifier, u8 op_modifier,
+                        u16 op, unsigned long timeout)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       void __iomem *hcr = priv->cmd.hcr;
+       int err = 0;
+       unsigned long end;
+       u32 stat;
+
+       down(&priv->cmd.poll_sem);
+
+       if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+               /*
+                * Device is going through error recovery
+                * and cannot accept commands.
+                */
+               err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+               goto out;
+       }
+
+       if (out_is_imm && !out_param) {
+               mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+                        op);
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+                           in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
+       if (err)
+               goto out_reset;
+
+       end = msecs_to_jiffies(timeout) + jiffies;
+       while (cmd_pending(dev) && time_before(jiffies, end)) {
+               if (pci_channel_offline(dev->persist->pdev)) {
+                       /*
+                        * Device is going through error recovery
+                        * and cannot accept commands.
+                        */
+                       err = -EIO;
+                       goto out_reset;
+               }
+
+               if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+                       err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+                       goto out;
+               }
+
+               cond_resched();
+       }
+
+       if (cmd_pending(dev)) {
+               mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+                         op);
+               err = -EIO;
+               goto out_reset;
+       }
+
+       if (out_is_imm)
+               *out_param =
+                       (u64) be32_to_cpu((__force __be32)
+                                         __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
+                       (u64) be32_to_cpu((__force __be32)
+                                         __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
+       stat = be32_to_cpu((__force __be32)
+                          __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
+       err = mlx4_status_to_errno(stat);
+       if (err) {
+               mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+                        op, stat);
+               if (mlx4_closing_cmd_fatal_error(op, stat))
+                       goto out_reset;
+               goto out;
+       }
+
+out_reset:
+       if (err)
+               err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
+out:
+       up(&priv->cmd.poll_sem);
+       return err;
+}
+
+void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cmd_context *context =
+               &priv->cmd.context[token & priv->cmd.token_mask];
+
+       /* previously timed out command completing at long last */
+       if (token != context->token)
+               return;
+
+       context->fw_status = status;
+       context->result    = mlx4_status_to_errno(status);
+       context->out_param = out_param;
+
+       complete(&context->done);
+}
+
+static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+                        int out_is_imm, u32 in_modifier, u8 op_modifier,
+                        u16 op, unsigned long timeout)
+{
+       struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+       struct mlx4_cmd_context *context;
+       int err = 0;
+
+       down(&cmd->event_sem);
+
+       spin_lock(&cmd->context_lock);
+       BUG_ON(cmd->free_head < 0);
+       context = &cmd->context[cmd->free_head];
+       context->token += cmd->token_mask + 1;
+       cmd->free_head = context->next;
+       spin_unlock(&cmd->context_lock);
+
+       if (out_is_imm && !out_param) {
+               mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+                        op);
+               err = -EINVAL;
+               goto out;
+       }
+
+       reinit_completion(&context->done);
+
+       err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+                           in_modifier, op_modifier, op, context->token, 1);
+       if (err)
+               goto out_reset;
+
+       if (!wait_for_completion_timeout(&context->done,
+                                        msecs_to_jiffies(timeout))) {
+               mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+                         op);
+               if (op == MLX4_CMD_NOP) {
+                       err = -EBUSY;
+                       goto out;
+               } else {
+                       err = -EIO;
+                       goto out_reset;
+               }
+       }
+
+       err = context->result;
+       if (err) {
+               /* Since we do not want to have this error message always
+                * displayed at driver start when there are ConnectX2 HCAs
+                * on the host, we deprecate the error message for this
+                * specific command/input_mod/opcode_mod/fw-status to be debug.
+                */
+               if (op == MLX4_CMD_SET_PORT &&
+                   (in_modifier == 1 || in_modifier == 2) &&
+                   op_modifier == MLX4_SET_PORT_IB_OPCODE &&
+                   context->fw_status == CMD_STAT_BAD_SIZE)
+                       mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
+                                op, context->fw_status);
+               else
+                       mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+                                op, context->fw_status);
+               if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                       err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+               else if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+                       goto out_reset;
+
+               goto out;
+       }
+
+       if (out_is_imm)
+               *out_param = context->out_param;
+
+out_reset:
+       if (err)
+               err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
+out:
+       spin_lock(&cmd->context_lock);
+       context->next = cmd->free_head;
+       cmd->free_head = context - cmd->context;
+       spin_unlock(&cmd->context_lock);
+
+       up(&cmd->event_sem);
+       return err;
+}
+
+int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+              int out_is_imm, u32 in_modifier, u8 op_modifier,
+              u16 op, unsigned long timeout, int native)
+{
+       if (pci_channel_offline(dev->persist->pdev))
+               return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
+
+       if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+               if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+                       return mlx4_internal_err_ret_value(dev, op,
+                                                         op_modifier);
+               if (mlx4_priv(dev)->cmd.use_events)
+                       return mlx4_cmd_wait(dev, in_param, out_param,
+                                            out_is_imm, in_modifier,
+                                            op_modifier, op, timeout);
+               else
+                       return mlx4_cmd_poll(dev, in_param, out_param,
+                                            out_is_imm, in_modifier,
+                                            op_modifier, op, timeout);
+       }
+       return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
+                             in_modifier, op_modifier, op, timeout);
+}
+EXPORT_SYMBOL_GPL(__mlx4_cmd);
+
+
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+{
+       return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
+                       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
+                          int slave, u64 slave_addr,
+                          int size, int is_read)
+{
+       u64 in_param;
+       u64 out_param;
+
+       if ((slave_addr & 0xfff) | (master_addr & 0xfff) |
+           (slave & ~0x7f) | (size & 0xff)) {
+               mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx master_addr:0x%llx slave_id:%d size:%d\n",
+                        slave_addr, master_addr, slave, size);
+               return -EINVAL;
+       }
+
+       if (is_read) {
+               in_param = (u64) slave | slave_addr;
+               out_param = (u64) dev->caps.function | master_addr;
+       } else {
+               in_param = (u64) dev->caps.function | master_addr;
+               out_param = (u64) slave | slave_addr;
+       }
+
+       return mlx4_cmd_imm(dev, in_param, &out_param, size, 0,
+                           MLX4_CMD_ACCESS_MEM,
+                           MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
+                              struct mlx4_cmd_mailbox *inbox,
+                              struct mlx4_cmd_mailbox *outbox)
+{
+       struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
+       struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
+       int err;
+       int i;
+
+       if (index & 0x1f)
+               return -EINVAL;
+
+       in_mad->attr_mod = cpu_to_be32(index / 32);
+
+       err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+                          MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+                          MLX4_CMD_NATIVE);
+       if (err)
+               return err;
+
+       for (i = 0; i < 32; ++i)
+               pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
+
+       return err;
+}
+
+static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
+                              struct mlx4_cmd_mailbox *inbox,
+                              struct mlx4_cmd_mailbox *outbox)
+{
+       int i;
+       int err;
+
+       for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
+               err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+#define PORT_CAPABILITY_LOCATION_IN_SMP 20
+#define PORT_STATE_OFFSET 32
+
+static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
+{
+       if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
+               return IB_PORT_ACTIVE;
+       else
+               return IB_PORT_DOWN;
+}
+
+static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
+                               struct mlx4_vhcr *vhcr,
+                               struct mlx4_cmd_mailbox *inbox,
+                               struct mlx4_cmd_mailbox *outbox,
+                               struct mlx4_cmd_info *cmd)
+{
+       struct ib_smp *smp = inbox->buf;
+       u32 index;
+       u8 port;
+       u8 opcode_modifier;
+       u16 *table;
+       int err;
+       int vidx, pidx;
+       int network_view;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct ib_smp *outsmp = outbox->buf;
+       __be16 *outtab = (__be16 *)(outsmp->data);
+       __be32 slave_cap_mask;
+       __be64 slave_node_guid;
+
+       port = vhcr->in_modifier;
+
+       /* network-view bit is for driver use only, and should not be passed to FW */
+       opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+       network_view = !!(vhcr->op_modifier & 0x8);
+
+       if (smp->base_version == 1 &&
+           smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+           smp->class_version == 1) {
+               /* host view is paravirtualized */
+               if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
+                       if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+                               index = be32_to_cpu(smp->attr_mod);
+                               if (port < 1 || port > dev->caps.num_ports)
+                                       return -EINVAL;
+                               table = kcalloc((dev->caps.pkey_table_len[port] / 32) + 1,
+                                               sizeof(*table) * 32, GFP_KERNEL);
+
+                               if (!table)
+                                       return -ENOMEM;
+                               /* need to get the full pkey table because the paravirtualized
+                                * pkeys may be scattered among several pkey blocks.
+                                */
+                               err = get_full_pkey_table(dev, port, table, inbox, outbox);
+                               if (!err) {
+                                       for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
+                                               pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
+                                               outtab[vidx % 32] = cpu_to_be16(table[pidx]);
+                                       }
+                               }
+                               kfree(table);
+                               return err;
+                       }
+                       if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
+                               /*get the slave specific caps:*/
+                               /*do the command */
+                               err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+                                           vhcr->in_modifier, opcode_modifier,
+                                           vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+                               /* modify the response for slaves */
+                               if (!err && slave != mlx4_master_func_num(dev)) {
+                                       u8 *state = outsmp->data + PORT_STATE_OFFSET;
+
+                                       *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
+                                       slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+                                       memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
+                               }
+                               return err;
+                       }
+                       if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
+                               __be64 guid = mlx4_get_admin_guid(dev, slave,
+                                                                 port);
+
+                               /* set the PF admin guid to the FW/HW burned
+                                * GUID, if it wasn't yet set
+                                */
+                               if (slave == 0 && guid == 0) {
+                                       smp->attr_mod = 0;
+                                       err = mlx4_cmd_box(dev,
+                                                          inbox->dma,
+                                                          outbox->dma,
+                                                          vhcr->in_modifier,
+                                                          opcode_modifier,
+                                                          vhcr->op,
+                                                          MLX4_CMD_TIME_CLASS_C,
+                                                          MLX4_CMD_NATIVE);
+                                       if (err)
+                                               return err;
+                                       mlx4_set_admin_guid(dev,
+                                                           *(__be64 *)outsmp->
+                                                           data, slave, port);
+                               } else {
+                                       memcpy(outsmp->data, &guid, 8);
+                               }
+
+                               /* clean all other gids */
+                               memset(outsmp->data + 8, 0, 56);
+                               return 0;
+                       }
+                       if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
+                               err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+                                            vhcr->in_modifier, opcode_modifier,
+                                            vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+                               if (!err) {
+                                       slave_node_guid =  mlx4_get_slave_node_guid(dev, slave);
+                                       memcpy(outsmp->data + 12, &slave_node_guid, 8);
+                               }
+                               return err;
+                       }
+               }
+       }
+
+       /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+        * These are the MADs used by ib verbs (such as ib_query_gids).
+        */
+       if (slave != mlx4_master_func_num(dev) &&
+           !mlx4_vf_smi_enabled(dev, slave, port)) {
+               if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+                     smp->method == IB_MGMT_METHOD_GET) || network_view) {
+                       mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+                                slave, smp->method, smp->mgmt_class,
+                                network_view ? "Network" : "Host",
+                                be16_to_cpu(smp->attr_id));
+                       return -EPERM;
+               }
+       }
+
+       return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+                                   vhcr->in_modifier, opcode_modifier,
+                                   vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_CMD_EPERM_wrapper(struct mlx4_dev *dev, int slave,
+                    struct mlx4_vhcr *vhcr,
+                    struct mlx4_cmd_mailbox *inbox,
+                    struct mlx4_cmd_mailbox *outbox,
+                    struct mlx4_cmd_info *cmd)
+{
+       return -EPERM;
+}
+
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+                    struct mlx4_vhcr *vhcr,
+                    struct mlx4_cmd_mailbox *inbox,
+                    struct mlx4_cmd_mailbox *outbox,
+                    struct mlx4_cmd_info *cmd)
+{
+       u64 in_param;
+       u64 out_param;
+       int err;
+
+       in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param;
+       out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param;
+       if (cmd->encode_slave_id) {
+               in_param &= 0xffffffffffffff00ll;
+               in_param |= slave;
+       }
+
+       err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm,
+                        vhcr->in_modifier, vhcr->op_modifier, vhcr->op,
+                        MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+       if (cmd->out_is_imm)
+               vhcr->out_param = out_param;
+
+       return err;
+}
+
+static struct mlx4_cmd_info cmd_info[] = {
+       {
+               .opcode = MLX4_CMD_QUERY_FW,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_FW_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_HCA,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_DEV_CAP,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_DEV_CAP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_FUNC_CAP,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_FUNC_CAP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_ADAPTER,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_INIT_PORT,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_INIT_PORT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_CLOSE_PORT,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm  = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CLOSE_PORT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_PORT,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_PORT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SET_PORT,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SET_PORT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_MAP_EQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_MAP_EQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SW2HW_EQ,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_SW2HW_EQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_HW_HEALTH_CHECK,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_NOP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_CONFIG_DEV,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CONFIG_DEV_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_ALLOC_RES,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = true,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_ALLOC_RES_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_FREE_RES,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_FREE_RES_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SW2HW_MPT,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_SW2HW_MPT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_MPT,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_MPT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_HW2SW_MPT,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_HW2SW_MPT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_READ_MTT,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_WRITE_MTT,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_WRITE_MTT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SYNC_TPT,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_HW2SW_EQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_HW2SW_EQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_EQ,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_EQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SW2HW_CQ,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_SW2HW_CQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_HW2SW_CQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_HW2SW_CQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_CQ,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_CQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_MODIFY_CQ,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = true,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_MODIFY_CQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SW2HW_SRQ,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_SW2HW_SRQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_HW2SW_SRQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_HW2SW_SRQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_SRQ,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_SRQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_ARM_SRQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_ARM_SRQ_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_RST2INIT_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = true,
+               .verify = NULL,
+               .wrapper = mlx4_RST2INIT_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_INIT2INIT_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_INIT2INIT_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_INIT2RTR_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_INIT2RTR_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_RTR2RTS_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_RTR2RTS_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_RTS2RTS_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_RTS2RTS_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SQERR2RTS_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SQERR2RTS_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_2ERR_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_GEN_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_RTS2SQD_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_GEN_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SQD2SQD_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SQD2SQD_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SQD2RTS_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SQD2RTS_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_2RST_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_2RST_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_QP,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_GEN_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SUSPEND_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_GEN_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_UNSUSPEND_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_GEN_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_UPDATE_QP,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_UPDATE_QP_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_GET_OP_REQ,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper,
+       },
+       {
+               .opcode = MLX4_CMD_ALLOCATE_VPP,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper,
+       },
+       {
+               .opcode = MLX4_CMD_SET_VPORT_QOS,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper,
+       },
+       {
+               .opcode = MLX4_CMD_CONF_SPECIAL_QP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL, /* XXX verify: only demux can do this */
+               .wrapper = NULL
+       },
+       {
+               .opcode = MLX4_CMD_MAD_IFC,
+               .has_inbox = true,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_MAD_IFC_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_MAD_DEMUX,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_QUERY_IF_STAT,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QUERY_IF_STAT_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_ACCESS_REG,
+               .has_inbox = true,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_ACCESS_REG_wrapper,
+       },
+       {
+               .opcode = MLX4_CMD_CONGESTION_CTRL_OPCODE,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper,
+       },
+       /* Native multicast commands are not available for guests */
+       {
+               .opcode = MLX4_CMD_QP_ATTACH,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QP_ATTACH_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_PROMISC,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_PROMISC_wrapper
+       },
+       /* Ethernet specific commands */
+       {
+               .opcode = MLX4_CMD_SET_VLAN_FLTR,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SET_VLAN_FLTR_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_SET_MCAST_FLTR,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_SET_MCAST_FLTR_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_DUMP_ETH_STATS,
+               .has_inbox = false,
+               .has_outbox = true,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_DUMP_ETH_STATS_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_INFORM_FLR_DONE,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = NULL
+       },
+       /* flow steering commands */
+       {
+               .opcode = MLX4_QP_FLOW_STEERING_ATTACH,
+               .has_inbox = true,
+               .has_outbox = false,
+               .out_is_imm = true,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper
+       },
+       {
+               .opcode = MLX4_QP_FLOW_STEERING_DETACH,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
+       },
+       {
+               .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper
+       },
+       {
+               .opcode = MLX4_CMD_VIRT_PORT_MAP,
+               .has_inbox = false,
+               .has_outbox = false,
+               .out_is_imm = false,
+               .encode_slave_id = false,
+               .verify = NULL,
+               .wrapper = mlx4_CMD_EPERM_wrapper
+       },
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+                                   struct mlx4_vhcr_cmd *in_vhcr)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cmd_info *cmd = NULL;
+       struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr;
+       struct mlx4_vhcr *vhcr;
+       struct mlx4_cmd_mailbox *inbox = NULL;
+       struct mlx4_cmd_mailbox *outbox = NULL;
+       u64 in_param;
+       u64 out_param;
+       int ret = 0;
+       int i;
+       int err = 0;
+
+       /* Create sw representation of Virtual HCR */
+       vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL);
+       if (!vhcr)
+               return -ENOMEM;
+
+       /* DMA in the vHCR */
+       if (!in_vhcr) {
+               ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+                                     priv->mfunc.master.slave_state[slave].vhcr_dma,
+                                     ALIGN(sizeof(struct mlx4_vhcr_cmd),
+                                           MLX4_ACCESS_MEM_ALIGN), 1);
+               if (ret) {
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
+                                        __func__, ret);
+                       kfree(vhcr);
+                       return ret;
+               }
+       }
+
+       /* Fill SW VHCR fields */
+       vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param);
+       vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param);
+       vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier);
+       vhcr->token = be16_to_cpu(vhcr_cmd->token);
+       vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff;
+       vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12);
+       vhcr->e_bit = vhcr_cmd->flags & (1 << 6);
+
+       /* Lookup command */
+       for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) {
+               if (vhcr->op == cmd_info[i].opcode) {
+                       cmd = &cmd_info[i];
+                       break;
+               }
+       }
+       if (!cmd) {
+               mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n",
+                        vhcr->op, slave);
+               vhcr_cmd->status = CMD_STAT_BAD_PARAM;
+               goto out_status;
+       }
+
+       /* Read inbox */
+       if (cmd->has_inbox) {
+               vhcr->in_param &= INBOX_MASK;
+               inbox = mlx4_alloc_cmd_mailbox(dev);
+               if (IS_ERR(inbox)) {
+                       vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+                       inbox = NULL;
+                       goto out_status;
+               }
+
+               ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+                                     vhcr->in_param,
+                                     MLX4_MAILBOX_SIZE, 1);
+               if (ret) {
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
+                                        __func__, cmd->opcode);
+                       vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
+                       goto out_status;
+               }
+       }
+
+       /* Apply permission and bound checks if applicable */
+       if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) {
+               mlx4_warn(dev, "Command:0x%x from slave: %d failed protection checks for resource_id:%d\n",
+                         vhcr->op, slave, vhcr->in_modifier);
+               vhcr_cmd->status = CMD_STAT_BAD_OP;
+               goto out_status;
+       }
+
+       /* Allocate outbox */
+       if (cmd->has_outbox) {
+               outbox = mlx4_alloc_cmd_mailbox(dev);
+               if (IS_ERR(outbox)) {
+                       vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+                       outbox = NULL;
+                       goto out_status;
+               }
+       }
+
+       /* Execute the command! */
+       if (cmd->wrapper) {
+               err = cmd->wrapper(dev, slave, vhcr, inbox, outbox,
+                                  cmd);
+               if (cmd->out_is_imm)
+                       vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+       } else {
+               in_param = cmd->has_inbox ? (u64) inbox->dma :
+                       vhcr->in_param;
+               out_param = cmd->has_outbox ? (u64) outbox->dma :
+                       vhcr->out_param;
+               err = __mlx4_cmd(dev, in_param, &out_param,
+                                cmd->out_is_imm, vhcr->in_modifier,
+                                vhcr->op_modifier, vhcr->op,
+                                MLX4_CMD_TIME_CLASS_A,
+                                MLX4_CMD_NATIVE);
+
+               if (cmd->out_is_imm) {
+                       vhcr->out_param = out_param;
+                       vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+               }
+       }
+
+       if (err) {
+               if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                       mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
+                                 vhcr->op, slave, vhcr->errno, err);
+               vhcr_cmd->status = mlx4_errno_to_status(err);
+               goto out_status;
+       }
+
+
+       /* Write outbox if command completed successfully */
+       if (cmd->has_outbox && !vhcr_cmd->status) {
+               ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave,
+                                     vhcr->out_param,
+                                     MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED);
+               if (ret) {
+                       /* If we failed to write back the outbox after the
+                        *command was successfully executed, we must fail this
+                        * slave, as it is now in undefined state */
+                       if (!(dev->persist->state &
+                           MLX4_DEVICE_STATE_INTERNAL_ERROR))
+                               mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
+                       goto out;
+               }
+       }
+
+out_status:
+       /* DMA back vhcr result */
+       if (!in_vhcr) {
+               ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+                                     priv->mfunc.master.slave_state[slave].vhcr_dma,
+                                     ALIGN(sizeof(struct mlx4_vhcr),
+                                           MLX4_ACCESS_MEM_ALIGN),
+                                     MLX4_CMD_WRAPPED);
+               if (ret)
+                       mlx4_err(dev, "%s:Failed writing vhcr result\n",
+                                __func__);
+               else if (vhcr->e_bit &&
+                        mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe))
+                               mlx4_warn(dev, "Failed to generate command completion eqe for slave %d\n",
+                                         slave);
+       }
+
+out:
+       kfree(vhcr);
+       mlx4_free_cmd_mailbox(dev, inbox);
+       mlx4_free_cmd_mailbox(dev, outbox);
+       return ret;
+}
+
+static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
+                                           int slave, int port)
+{
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_vport_state *vp_admin;
+       struct mlx4_vf_immed_vlan_work *work;
+       struct mlx4_dev *dev = &(priv->dev);
+       int err;
+       int admin_vlan_ix = NO_INDX;
+
+       vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+       vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+       if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
+           vp_oper->state.default_qos == vp_admin->default_qos &&
+           vp_oper->state.link_state == vp_admin->link_state &&
+           vp_oper->state.qos_vport == vp_admin->qos_vport)
+               return 0;
+
+       if (!(priv->mfunc.master.slave_state[slave].active &&
+             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)) {
+               /* even if the UPDATE_QP command isn't supported, we still want
+                * to set this VF link according to the admin directive
+                */
+               vp_oper->state.link_state = vp_admin->link_state;
+               return -1;
+       }
+
+       mlx4_dbg(dev, "updating immediately admin params slave %d port %d\n",
+                slave, port);
+       mlx4_dbg(dev, "vlan %d QoS %d link down %d\n",
+                vp_admin->default_vlan, vp_admin->default_qos,
+                vp_admin->link_state);
+
+       work = kzalloc(sizeof(*work), GFP_KERNEL);
+       if (!work)
+               return -ENOMEM;
+
+       if (vp_oper->state.default_vlan != vp_admin->default_vlan) {
+               if (MLX4_VGT != vp_admin->default_vlan) {
+                       err = __mlx4_register_vlan(&priv->dev, port,
+                                                  vp_admin->default_vlan,
+                                                  &admin_vlan_ix);
+                       if (err) {
+                               kfree(work);
+                               mlx4_warn(&priv->dev,
+                                         "No vlan resources slave %d, port %d\n",
+                                         slave, port);
+                               return err;
+                       }
+               } else {
+                       admin_vlan_ix = NO_INDX;
+               }
+               work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN;
+               mlx4_dbg(&priv->dev,
+                        "alloc vlan %d idx  %d slave %d port %d\n",
+                        (int)(vp_admin->default_vlan),
+                        admin_vlan_ix, slave, port);
+       }
+
+       /* save original vlan ix and vlan id */
+       work->orig_vlan_id = vp_oper->state.default_vlan;
+       work->orig_vlan_ix = vp_oper->vlan_idx;
+
+       /* handle new qos */
+       if (vp_oper->state.default_qos != vp_admin->default_qos)
+               work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS;
+
+       if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN)
+               vp_oper->vlan_idx = admin_vlan_ix;
+
+       vp_oper->state.default_vlan = vp_admin->default_vlan;
+       vp_oper->state.default_qos = vp_admin->default_qos;
+       vp_oper->state.link_state = vp_admin->link_state;
+       vp_oper->state.qos_vport = vp_admin->qos_vport;
+
+       if (vp_admin->link_state == IFLA_VF_LINK_STATE_DISABLE)
+               work->flags |= MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE;
+
+       /* iterate over QPs owned by this slave, using UPDATE_QP */
+       work->port = port;
+       work->slave = slave;
+       work->qos = vp_oper->state.default_qos;
+       work->qos_vport = vp_oper->state.qos_vport;
+       work->vlan_id = vp_oper->state.default_vlan;
+       work->vlan_ix = vp_oper->vlan_idx;
+       work->priv = priv;
+       INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler);
+       queue_work(priv->mfunc.master.comm_wq, &work->work);
+
+       return 0;
+}
+
+static void mlx4_set_default_port_qos(struct mlx4_dev *dev, int port)
+{
+       struct mlx4_qos_manager *port_qos_ctl;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       port_qos_ctl = &priv->mfunc.master.qos_ctl[port];
+       bitmap_zero(port_qos_ctl->priority_bm, MLX4_NUM_UP);
+
+       /* Enable only default prio at PF init routine */
+       set_bit(MLX4_DEFAULT_QOS_PRIO, port_qos_ctl->priority_bm);
+}
+
+static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port)
+{
+       int i;
+       int err;
+       int num_vfs;
+       u16 availible_vpp;
+       u8 vpp_param[MLX4_NUM_UP];
+       struct mlx4_qos_manager *port_qos;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param);
+       if (err) {
+               mlx4_info(dev, "Failed query availible VPPs\n");
+               return;
+       }
+
+       port_qos = &priv->mfunc.master.qos_ctl[port];
+       num_vfs = (availible_vpp /
+                  bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP));
+
+       for (i = 0; i < MLX4_NUM_UP; i++) {
+               if (test_bit(i, port_qos->priority_bm))
+                       vpp_param[i] = num_vfs;
+       }
+
+       err = mlx4_ALLOCATE_VPP_set(dev, port, vpp_param);
+       if (err) {
+               mlx4_info(dev, "Failed allocating VPPs\n");
+               return;
+       }
+
+       /* Query actual allocated VPP, just to make sure */
+       err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param);
+       if (err) {
+               mlx4_info(dev, "Failed query availible VPPs\n");
+               return;
+       }
+
+       port_qos->num_of_qos_vfs = num_vfs;
+       mlx4_dbg(dev, "Port %d Availible VPPs %d\n", port, availible_vpp);
+
+       for (i = 0; i < MLX4_NUM_UP; i++)
+               mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i,
+                        vpp_param[i]);
+}
+
+static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
+{
+       int port, err;
+       struct mlx4_vport_state *vp_admin;
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
+                       &priv->dev, slave);
+       int min_port = find_first_bit(actv_ports.ports,
+                                     priv->dev.caps.num_ports) + 1;
+       int max_port = min_port - 1 +
+               bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports);
+
+       for (port = min_port; port <= max_port; port++) {
+               if (!test_bit(port - 1, actv_ports.ports))
+                       continue;
+               priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+                       priv->mfunc.master.vf_admin[slave].enable_smi[port];
+               vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+               vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+               vp_oper->state = *vp_admin;
+               if (MLX4_VGT != vp_admin->default_vlan) {
+                       err = __mlx4_register_vlan(&priv->dev, port,
+                                                  vp_admin->default_vlan, &(vp_oper->vlan_idx));
+                       if (err) {
+                               vp_oper->vlan_idx = NO_INDX;
+                               mlx4_warn(&priv->dev,
+                                         "No vlan resources slave %d, port %d\n",
+                                         slave, port);
+                               return err;
+                       }
+                       mlx4_dbg(&priv->dev, "alloc vlan %d idx  %d slave %d port %d\n",
+                                (int)(vp_oper->state.default_vlan),
+                                vp_oper->vlan_idx, slave, port);
+               }
+               if (vp_admin->spoofchk) {
+                       vp_oper->mac_idx = __mlx4_register_mac(&priv->dev,
+                                                              port,
+                                                              vp_admin->mac);
+                       if (0 > vp_oper->mac_idx) {
+                               err = vp_oper->mac_idx;
+                               vp_oper->mac_idx = NO_INDX;
+                               mlx4_warn(&priv->dev,
+                                         "No mac resources slave %d, port %d\n",
+                                         slave, port);
+                               return err;
+                       }
+                       mlx4_dbg(&priv->dev, "alloc mac %llx idx  %d slave %d port %d\n",
+                                vp_oper->state.mac, vp_oper->mac_idx, slave, port);
+               }
+       }
+       return 0;
+}
+
+static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave)
+{
+       int port;
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
+                       &priv->dev, slave);
+       int min_port = find_first_bit(actv_ports.ports,
+                                     priv->dev.caps.num_ports) + 1;
+       int max_port = min_port - 1 +
+               bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports);
+
+
+       for (port = min_port; port <= max_port; port++) {
+               if (!test_bit(port - 1, actv_ports.ports))
+                       continue;
+               priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+                       MLX4_VF_SMI_DISABLED;
+               vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+               if (NO_INDX != vp_oper->vlan_idx) {
+                       __mlx4_unregister_vlan(&priv->dev,
+                                              port, vp_oper->state.default_vlan);
+                       vp_oper->vlan_idx = NO_INDX;
+               }
+               if (NO_INDX != vp_oper->mac_idx) {
+                       __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
+                       vp_oper->mac_idx = NO_INDX;
+               }
+       }
+       return;
+}
+
+static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
+                              u16 param, u8 toggle)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+       u32 reply;
+       u8 is_going_down = 0;
+       int i;
+       unsigned long flags;
+
+       slave_state[slave].comm_toggle ^= 1;
+       reply = (u32) slave_state[slave].comm_toggle << 31;
+       if (toggle != slave_state[slave].comm_toggle) {
+               mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER STATE COMPROMISED ***\n",
+                         toggle, slave);
+               goto reset_slave;
+       }
+       if (cmd == MLX4_COMM_CMD_RESET) {
+               mlx4_warn(dev, "Received reset from slave:%d\n", slave);
+               slave_state[slave].active = false;
+               slave_state[slave].old_vlan_api = false;
+               mlx4_master_deactivate_admin_state(priv, slave);
+               for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
+                               slave_state[slave].event_eq[i].eqn = -1;
+                               slave_state[slave].event_eq[i].token = 0;
+               }
+               /*check if we are in the middle of FLR process,
+               if so return "retry" status to the slave*/
+               if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
+                       goto inform_slave_state;
+
+               mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
+
+               /* write the version in the event field */
+               reply |= mlx4_comm_get_version();
+
+               goto reset_slave;
+       }
+       /*command from slave in the middle of FLR*/
+       if (cmd != MLX4_COMM_CMD_RESET &&
+           MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) {
+               mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) in the middle of FLR\n",
+                         slave, cmd);
+               return;
+       }
+
+       switch (cmd) {
+       case MLX4_COMM_CMD_VHCR0:
+               if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET)
+                       goto reset_slave;
+               slave_state[slave].vhcr_dma = ((u64) param) << 48;
+               priv->mfunc.master.slave_state[slave].cookie = 0;
+               break;
+       case MLX4_COMM_CMD_VHCR1:
+               if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0)
+                       goto reset_slave;
+               slave_state[slave].vhcr_dma |= ((u64) param) << 32;
+               break;
+       case MLX4_COMM_CMD_VHCR2:
+               if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1)
+                       goto reset_slave;
+               slave_state[slave].vhcr_dma |= ((u64) param) << 16;
+               break;
+       case MLX4_COMM_CMD_VHCR_EN:
+               if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2)
+                       goto reset_slave;
+               slave_state[slave].vhcr_dma |= param;
+               if (mlx4_master_activate_admin_state(priv, slave))
+                               goto reset_slave;
+               slave_state[slave].active = true;
+               mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
+               break;
+       case MLX4_COMM_CMD_VHCR_POST:
+               if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
+                   (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
+                       mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
+                                 slave, cmd, slave_state[slave].last_cmd);
+                       goto reset_slave;
+               }
+
+               mutex_lock(&priv->cmd.slave_cmd_mutex);
+               if (mlx4_master_process_vhcr(dev, slave, NULL)) {
+                       mlx4_err(dev, "Failed processing vhcr for slave:%d, resetting slave\n",
+                                slave);
+                       mutex_unlock(&priv->cmd.slave_cmd_mutex);
+                       goto reset_slave;
+               }
+               mutex_unlock(&priv->cmd.slave_cmd_mutex);
+               break;
+       default:
+               mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
+               goto reset_slave;
+       }
+       spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+       if (!slave_state[slave].is_slave_going_down)
+               slave_state[slave].last_cmd = cmd;
+       else
+               is_going_down = 1;
+       spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+       if (is_going_down) {
+               mlx4_warn(dev, "Slave is going down aborting command(%d) executing from slave:%d\n",
+                         cmd, slave);
+               return;
+       }
+       __raw_writel((__force u32) cpu_to_be32(reply),
+                    &priv->mfunc.comm[slave].slave_read);
+       mmiowb();
+
+       return;
+
+reset_slave:
+       /* cleanup any slave resources */
+       if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
+               mlx4_delete_all_resources_for_slave(dev, slave);
+
+       if (cmd != MLX4_COMM_CMD_RESET) {
+               mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
+                         slave, cmd);
+               /* Turn on internal error letting slave reset itself immeditaly,
+                * otherwise it might take till timeout on command is passed
+                */
+               reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
+       }
+
+       spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+       if (!slave_state[slave].is_slave_going_down)
+               slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
+       spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+       /*with slave in the middle of flr, no need to clean resources again.*/
+inform_slave_state:
+       memset(&slave_state[slave].event_eq, 0,
+              sizeof(struct mlx4_slave_event_eq_info));
+       __raw_writel((__force u32) cpu_to_be32(reply),
+                    &priv->mfunc.comm[slave].slave_read);
+       wmb();
+}
+
+/* master command processing */
+void mlx4_master_comm_channel(struct work_struct *work)
+{
+       struct mlx4_mfunc_master_ctx *master =
+               container_of(work,
+                            struct mlx4_mfunc_master_ctx,
+                            comm_work);
+       struct mlx4_mfunc *mfunc =
+               container_of(master, struct mlx4_mfunc, master);
+       struct mlx4_priv *priv =
+               container_of(mfunc, struct mlx4_priv, mfunc);
+       struct mlx4_dev *dev = &priv->dev;
+       __be32 *bit_vec;
+       u32 comm_cmd;
+       u32 vec;
+       int i, j, slave;
+       int toggle;
+       int served = 0;
+       int reported = 0;
+       u32 slt;
+
+       bit_vec = master->comm_arm_bit_vector;
+       for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
+               vec = be32_to_cpu(bit_vec[i]);
+               for (j = 0; j < 32; j++) {
+                       if (!(vec & (1 << j)))
+                               continue;
+                       ++reported;
+                       slave = (i * 32) + j;
+                       comm_cmd = swab32(readl(
+                                         &mfunc->comm[slave].slave_write));
+                       slt = swab32(readl(&mfunc->comm[slave].slave_read))
+                                    >> 31;
+                       toggle = comm_cmd >> 31;
+                       if (toggle != slt) {
+                               if (master->slave_state[slave].comm_toggle
+                                   != slt) {
+                                       pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
+                                               slave, slt,
+                                               master->slave_state[slave].comm_toggle);
+                                       master->slave_state[slave].comm_toggle =
+                                               slt;
+                               }
+                               mlx4_master_do_cmd(dev, slave,
+                                                  comm_cmd >> 16 & 0xff,
+                                                  comm_cmd & 0xffff, toggle);
+                               ++served;
+                       }
+               }
+       }
+
+       if (reported && reported != served)
+               mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
+                         reported, served);
+
+       if (mlx4_ARM_COMM_CHANNEL(dev))
+               mlx4_warn(dev, "Failed to arm comm channel events\n");
+}
+
+static int sync_toggles(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       u32 wr_toggle;
+       u32 rd_toggle;
+       unsigned long end;
+
+       wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
+       if (wr_toggle == 0xffffffff)
+               end = jiffies + msecs_to_jiffies(30000);
+       else
+               end = jiffies + msecs_to_jiffies(5000);
+
+       while (time_before(jiffies, end)) {
+               rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
+               if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
+                       /* PCI might be offline */
+                       msleep(100);
+                       wr_toggle = swab32(readl(&priv->mfunc.comm->
+                                          slave_write));
+                       continue;
+               }
+
+               if (rd_toggle >> 31 == wr_toggle >> 31) {
+                       priv->cmd.comm_toggle = rd_toggle >> 31;
+                       return 0;
+               }
+
+               cond_resched();
+       }
+
+       /*
+        * we could reach here if for example the previous VM using this
+        * function misbehaved and left the channel with unsynced state. We
+        * should fix this here and give this VM a chance to use a properly
+        * synced channel
+        */
+       mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+       __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+       __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+       priv->cmd.comm_toggle = 0;
+
+       return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_slave_state *s_state;
+       int i, j, err, port;
+
+       if (mlx4_is_master(dev))
+               priv->mfunc.comm =
+               ioremap(pci_resource_start(dev->persist->pdev,
+                                          priv->fw.comm_bar) +
+                       priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+       else
+               priv->mfunc.comm =
+               ioremap(pci_resource_start(dev->persist->pdev, 2) +
+                       MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+       if (!priv->mfunc.comm) {
+               mlx4_err(dev, "Couldn't map communication vector\n");
+               goto err_vhcr;
+       }
+
+       if (mlx4_is_master(dev)) {
+               struct mlx4_vf_oper_state *vf_oper;
+               struct mlx4_vf_admin_state *vf_admin;
+
+               priv->mfunc.master.slave_state =
+                       kzalloc(dev->num_slaves *
+                               sizeof(struct mlx4_slave_state), GFP_KERNEL);
+               if (!priv->mfunc.master.slave_state)
+                       goto err_comm;
+
+               priv->mfunc.master.vf_admin =
+                       kzalloc(dev->num_slaves *
+                               sizeof(struct mlx4_vf_admin_state), GFP_KERNEL);
+               if (!priv->mfunc.master.vf_admin)
+                       goto err_comm_admin;
+
+               priv->mfunc.master.vf_oper =
+                       kzalloc(dev->num_slaves *
+                               sizeof(struct mlx4_vf_oper_state), GFP_KERNEL);
+               if (!priv->mfunc.master.vf_oper)
+                       goto err_comm_oper;
+
+               for (i = 0; i < dev->num_slaves; ++i) {
+                       vf_admin = &priv->mfunc.master.vf_admin[i];
+                       vf_oper = &priv->mfunc.master.vf_oper[i];
+                       s_state = &priv->mfunc.master.slave_state[i];
+                       s_state->last_cmd = MLX4_COMM_CMD_RESET;
+                       mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
+                       for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
+                               s_state->event_eq[j].eqn = -1;
+                       __raw_writel((__force u32) 0,
+                                    &priv->mfunc.comm[i].slave_write);
+                       __raw_writel((__force u32) 0,
+                                    &priv->mfunc.comm[i].slave_read);
+                       mmiowb();
+                       for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+                               struct mlx4_vport_state *admin_vport;
+                               struct mlx4_vport_state *oper_vport;
+
+                               s_state->vlan_filter[port] =
+                                       kzalloc(sizeof(struct mlx4_vlan_fltr),
+                                               GFP_KERNEL);
+                               if (!s_state->vlan_filter[port]) {
+                                       if (--port)
+                                               kfree(s_state->vlan_filter[port]);
+                                       goto err_slaves;
+                               }
+
+                               admin_vport = &vf_admin->vport[port];
+                               oper_vport = &vf_oper->vport[port].state;
+                               INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+                               admin_vport->default_vlan = MLX4_VGT;
+                               oper_vport->default_vlan = MLX4_VGT;
+                               admin_vport->qos_vport =
+                                               MLX4_VPP_DEFAULT_VPORT;
+                               oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
+                               vf_oper->vport[port].vlan_idx = NO_INDX;
+                               vf_oper->vport[port].mac_idx = NO_INDX;
+                               mlx4_set_random_admin_guid(dev, i, port);
+                       }
+                       spin_lock_init(&s_state->lock);
+               }
+
+               if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP) {
+                       for (port = 1; port <= dev->caps.num_ports; port++) {
+                               if (mlx4_is_eth(dev, port)) {
+                                       mlx4_set_default_port_qos(dev, port);
+                                       mlx4_allocate_port_vpps(dev, port);
+                               }
+                       }
+               }
+
+               memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
+               priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+               INIT_WORK(&priv->mfunc.master.comm_work,
+                         mlx4_master_comm_channel);
+               INIT_WORK(&priv->mfunc.master.slave_event_work,
+                         mlx4_gen_slave_eqe);
+               INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+                         mlx4_master_handle_slave_flr);
+               spin_lock_init(&priv->mfunc.master.slave_state_lock);
+               spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
+               priv->mfunc.master.comm_wq =
+                       create_singlethread_workqueue("mlx4_comm");
+               if (!priv->mfunc.master.comm_wq)
+                       goto err_slaves;
+
+               if (mlx4_init_resource_tracker(dev))
+                       goto err_thread;
+
+       } else {
+               err = sync_toggles(dev);
+               if (err) {
+                       mlx4_err(dev, "Couldn't sync toggles\n");
+                       goto err_comm;
+               }
+       }
+       return 0;
+
+err_thread:
+       flush_workqueue(priv->mfunc.master.comm_wq);
+       destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+       while (--i) {
+               for (port = 1; port <= MLX4_MAX_PORTS; port++)
+                       kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+       }
+       kfree(priv->mfunc.master.vf_oper);
+err_comm_oper:
+       kfree(priv->mfunc.master.vf_admin);
+err_comm_admin:
+       kfree(priv->mfunc.master.slave_state);
+err_comm:
+       iounmap(priv->mfunc.comm);
+err_vhcr:
+       dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+                         priv->mfunc.vhcr,
+                         priv->mfunc.vhcr_dma);
+       priv->mfunc.vhcr = NULL;
+       return -ENOMEM;
+}
+
+int mlx4_cmd_init(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int flags = 0;
+
+       if (!priv->cmd.initialized) {
+               mutex_init(&priv->cmd.slave_cmd_mutex);
+               sema_init(&priv->cmd.poll_sem, 1);
+               priv->cmd.use_events = 0;
+               priv->cmd.toggle     = 1;
+               priv->cmd.initialized = 1;
+               flags |= MLX4_CMD_CLEANUP_STRUCT;
+       }
+
+       if (!mlx4_is_slave(dev) && !priv->cmd.hcr) {
+               priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev,
+                                       0) + MLX4_HCR_BASE, MLX4_HCR_SIZE);
+               if (!priv->cmd.hcr) {
+                       mlx4_err(dev, "Couldn't map command register\n");
+                       goto err;
+               }
+               flags |= MLX4_CMD_CLEANUP_HCR;
+       }
+
+       if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) {
+               priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev,
+                                                     PAGE_SIZE,
+                                                     &priv->mfunc.vhcr_dma,
+                                                     GFP_KERNEL);
+               if (!priv->mfunc.vhcr)
+                       goto err;
+
+               flags |= MLX4_CMD_CLEANUP_VHCR;
+       }
+
+       if (!priv->cmd.pool) {
+               priv->cmd.pool = pci_pool_create("mlx4_cmd",
+                                                dev->persist->pdev,
+                                                MLX4_MAILBOX_SIZE,
+                                                MLX4_MAILBOX_SIZE, 0);
+               if (!priv->cmd.pool)
+                       goto err;
+
+               flags |= MLX4_CMD_CLEANUP_POOL;
+       }
+
+       return 0;
+
+err:
+       mlx4_cmd_cleanup(dev, flags);
+       return -ENOMEM;
+}
+
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int slave;
+       u32 slave_read;
+
+       /* Report an internal error event to all
+        * communication channels.
+        */
+       for (slave = 0; slave < dev->num_slaves; slave++) {
+               slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
+               slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
+               __raw_writel((__force u32)cpu_to_be32(slave_read),
+                            &priv->mfunc.comm[slave].slave_read);
+               /* Make sure that our comm channel write doesn't
+                * get mixed in with writes from another CPU.
+                */
+               mmiowb();
+       }
+}
+
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i, port;
+
+       if (mlx4_is_master(dev)) {
+               flush_workqueue(priv->mfunc.master.comm_wq);
+               destroy_workqueue(priv->mfunc.master.comm_wq);
+               for (i = 0; i < dev->num_slaves; i++) {
+                       for (port = 1; port <= MLX4_MAX_PORTS; port++)
+                               kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+               }
+               kfree(priv->mfunc.master.slave_state);
+               kfree(priv->mfunc.master.vf_admin);
+               kfree(priv->mfunc.master.vf_oper);
+               dev->num_slaves = 0;
+       }
+
+       iounmap(priv->mfunc.comm);
+}
+
+void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) {
+               pci_pool_destroy(priv->cmd.pool);
+               priv->cmd.pool = NULL;
+       }
+
+       if (!mlx4_is_slave(dev) && priv->cmd.hcr &&
+           (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) {
+               iounmap(priv->cmd.hcr);
+               priv->cmd.hcr = NULL;
+       }
+       if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr &&
+           (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) {
+               dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+                                 priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+               priv->mfunc.vhcr = NULL;
+       }
+       if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT))
+               priv->cmd.initialized = 0;
+}
+
+/*
+ * Switch to using events to issue FW commands (can only be called
+ * after event queue for command events has been initialized).
+ */
+int mlx4_cmd_use_events(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i;
+       int err = 0;
+
+       priv->cmd.context = kmalloc(priv->cmd.max_cmds *
+                                  sizeof (struct mlx4_cmd_context),
+                                  GFP_KERNEL);
+       if (!priv->cmd.context)
+               return -ENOMEM;
+
+       for (i = 0; i < priv->cmd.max_cmds; ++i) {
+               priv->cmd.context[i].token = i;
+               priv->cmd.context[i].next  = i + 1;
+               /* To support fatal error flow, initialize all
+                * cmd contexts to allow simulating completions
+                * with complete() at any time.
+                */
+               init_completion(&priv->cmd.context[i].done);
+       }
+
+       priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
+       priv->cmd.free_head = 0;
+
+       sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
+       spin_lock_init(&priv->cmd.context_lock);
+
+       for (priv->cmd.token_mask = 1;
+            priv->cmd.token_mask < priv->cmd.max_cmds;
+            priv->cmd.token_mask <<= 1)
+               ; /* nothing */
+       --priv->cmd.token_mask;
+
+       down(&priv->cmd.poll_sem);
+       priv->cmd.use_events = 1;
+
+       return err;
+}
+
+/*
+ * Switch back to polling (used when shutting down the device)
+ */
+void mlx4_cmd_use_polling(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       int i;
+
+       priv->cmd.use_events = 0;
+
+       for (i = 0; i < priv->cmd.max_cmds; ++i)
+               down(&priv->cmd.event_sem);
+
+       kfree(priv->cmd.context);
+
+       up(&priv->cmd.poll_sem);
+}
+
+struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+
+       mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL);
+       if (!mailbox)
+               return ERR_PTR(-ENOMEM);
+
+       mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
+                                     &mailbox->dma);
+       if (!mailbox->buf) {
+               kfree(mailbox);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
+
+       return mailbox;
+}
+EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
+
+void mlx4_free_cmd_mailbox(struct mlx4_dev *dev,
+                          struct mlx4_cmd_mailbox *mailbox)
+{
+       if (!mailbox)
+               return;
+
+       pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma);
+       kfree(mailbox);
+}
+EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
+
+u32 mlx4_comm_get_version(void)
+{
+        return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER;
+}
+
+static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
+{
+       if ((vf < 0) || (vf >= dev->persist->num_vfs)) {
+               mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n",
+                        vf, dev->persist->num_vfs);
+               return -EINVAL;
+       }
+
+       return vf+1;
+}
+
+int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
+{
+       if (slave < 1 || slave > dev->persist->num_vfs) {
+               mlx4_err(dev,
+                        "Bad slave number:%d (number of activated slaves: %lu)\n",
+                        slave, dev->num_slaves);
+               return -EINVAL;
+       }
+       return slave - 1;
+}
+
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cmd_context *context;
+       int i;
+
+       spin_lock(&priv->cmd.context_lock);
+       if (priv->cmd.context) {
+               for (i = 0; i < priv->cmd.max_cmds; ++i) {
+                       context = &priv->cmd.context[i];
+                       context->fw_status = CMD_STAT_INTERNAL_ERR;
+                       context->result    =
+                               mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+                       complete(&context->done);
+               }
+       }
+       spin_unlock(&priv->cmd.context_lock);
+}
+
+struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave)
+{
+       struct mlx4_active_ports actv_ports;
+       int vf;
+
+       bitmap_zero(actv_ports.ports, MLX4_MAX_PORTS);
+
+       if (slave == 0) {
+               bitmap_fill(actv_ports.ports, dev->caps.num_ports);
+               return actv_ports;
+       }
+
+       vf = mlx4_get_vf_indx(dev, slave);
+       if (vf < 0)
+               return actv_ports;
+
+       bitmap_set(actv_ports.ports, dev->dev_vfs[vf].min_port - 1,
+                  min((int)dev->dev_vfs[mlx4_get_vf_indx(dev, slave)].n_ports,
+                  dev->caps.num_ports));
+
+       return actv_ports;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_active_ports);
+
+int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port)
+{
+       unsigned n;
+       struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+       unsigned m = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+
+       if (port <= 0 || port > m)
+               return -EINVAL;
+
+       n = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+       if (port <= n)
+               port = n + 1;
+
+       return port;
+}
+EXPORT_SYMBOL_GPL(mlx4_slave_convert_port);
+
+int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+       if (test_bit(port - 1, actv_ports.ports))
+               return port -
+                       find_first_bit(actv_ports.ports, dev->caps.num_ports);
+
+       return -1;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slave_port);
+
+struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev,
+                                                  int port)
+{
+       unsigned i;
+       struct mlx4_slaves_pport slaves_pport;
+
+       bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
+
+       if (port <= 0 || port > dev->caps.num_ports)
+               return slaves_pport;
+
+       for (i = 0; i < dev->persist->num_vfs + 1; i++) {
+               struct mlx4_active_ports actv_ports =
+                       mlx4_get_active_ports(dev, i);
+               if (test_bit(port - 1, actv_ports.ports))
+                       set_bit(i, slaves_pport.slaves);
+       }
+
+       return slaves_pport;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport);
+
+struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv(
+               struct mlx4_dev *dev,
+               const struct mlx4_active_ports *crit_ports)
+{
+       unsigned i;
+       struct mlx4_slaves_pport slaves_pport;
+
+       bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
+
+       for (i = 0; i < dev->persist->num_vfs + 1; i++) {
+               struct mlx4_active_ports actv_ports =
+                       mlx4_get_active_ports(dev, i);
+               if (bitmap_equal(crit_ports->ports, actv_ports.ports,
+                                dev->caps.num_ports))
+                       set_bit(i, slaves_pport.slaves);
+       }
+
+       return slaves_pport;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport_actv);
+
+static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+       int min_port = find_first_bit(actv_ports.ports, dev->caps.num_ports)
+                       + 1;
+       int max_port = min_port +
+               bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+
+       if (port < min_port)
+               port = min_port;
+       else if (port >= max_port)
+               port = max_port - 1;
+
+       return port;
+}
+
+static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port,
+                             int max_tx_rate)
+{
+       int i;
+       int err;
+       struct mlx4_qos_manager *port_qos;
+       struct mlx4_dev *dev = &priv->dev;
+       struct mlx4_vport_qos_param vpp_qos[MLX4_NUM_UP];
+
+       port_qos = &priv->mfunc.master.qos_ctl[port];
+       memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP);
+
+       if (slave > port_qos->num_of_qos_vfs) {
+               mlx4_info(dev, "No availible VPP resources for this VF\n");
+               return -EINVAL;
+       }
+
+       /* Query for default QoS values from Vport 0 is needed */
+       err = mlx4_SET_VPORT_QOS_get(dev, port, 0, vpp_qos);
+       if (err) {
+               mlx4_info(dev, "Failed to query Vport 0 QoS values\n");
+               return err;
+       }
+
+       for (i = 0; i < MLX4_NUM_UP; i++) {
+               if (test_bit(i, port_qos->priority_bm) && max_tx_rate) {
+                       vpp_qos[i].max_avg_bw = max_tx_rate;
+                       vpp_qos[i].enable = 1;
+               } else {
+                       /* if user supplied tx_rate == 0, meaning no rate limit
+                        * configuration is required. so we are leaving the
+                        * value of max_avg_bw as queried from Vport 0.
+                        */
+                       vpp_qos[i].enable = 0;
+               }
+       }
+
+       err = mlx4_SET_VPORT_QOS_set(dev, port, slave, vpp_qos);
+       if (err) {
+               mlx4_info(dev, "Failed to set Vport %d QoS values\n", slave);
+               return err;
+       }
+
+       return 0;
+}
+
+static bool mlx4_is_vf_vst_and_prio_qos(struct mlx4_dev *dev, int port,
+                                       struct mlx4_vport_state *vf_admin)
+{
+       struct mlx4_qos_manager *info;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (!mlx4_is_master(dev) ||
+           !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+               return false;
+
+       info = &priv->mfunc.master.qos_ctl[port];
+
+       if (vf_admin->default_vlan != MLX4_VGT &&
+           test_bit(vf_admin->default_qos, info->priority_bm))
+               return true;
+
+       return false;
+}
+
+static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port,
+                                      struct mlx4_vport_state *vf_admin,
+                                      int vlan, int qos)
+{
+       struct mlx4_vport_state dummy_admin = {0};
+
+       if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) ||
+           !vf_admin->tx_rate)
+               return true;
+
+       dummy_admin.default_qos = qos;
+       dummy_admin.default_vlan = vlan;
+
+       /* VF wants to move to other VST state which is valid with current
+        * rate limit. Either differnt default vlan in VST or other
+        * supported QoS priority. Otherwise we don't allow this change when
+        * the TX rate is still configured.
+        */
+       if (mlx4_is_vf_vst_and_prio_qos(dev, port, &dummy_admin))
+               return true;
+
+       mlx4_info(dev, "Cannot change VF state to %s while rate is set\n",
+                 (vlan == MLX4_VGT) ? "VGT" : "VST");
+
+       if (vlan != MLX4_VGT)
+               mlx4_info(dev, "VST priority %d not supported for QoS\n", qos);
+
+       mlx4_info(dev, "Please set rate to 0 prior to this VF state change\n");
+
+       return false;
+}
+
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vport_state *s_info;
+       int slave;
+
+       if (!mlx4_is_master(dev))
+               return -EPROTONOSUPPORT;
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+       s_info->mac = mac;
+       mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n",
+                 vf, port, s_info->mac);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
+
+
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vport_state *vf_admin;
+       int slave;
+
+       if ((!mlx4_is_master(dev)) ||
+           !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL))
+               return -EPROTONOSUPPORT;
+
+       if ((vlan > 4095) || (qos > 7))
+               return -EINVAL;
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+       if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos))
+               return -EPERM;
+
+       if ((0 == vlan) && (0 == qos))
+               vf_admin->default_vlan = MLX4_VGT;
+       else
+               vf_admin->default_vlan = vlan;
+       vf_admin->default_qos = qos;
+
+       /* If rate was configured prior to VST, we saved the configured rate
+        * in vf_admin->rate and now, if priority supported we enforce the QoS
+        */
+       if (mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) &&
+           vf_admin->tx_rate)
+               vf_admin->qos_vport = slave;
+
+       if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+               mlx4_info(dev,
+                         "updating vf %d port %d config will take effect on next VF restart\n",
+                         vf, port);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
+
+int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
+                    int max_tx_rate)
+{
+       int err;
+       int slave;
+       struct mlx4_vport_state *vf_admin;
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (!mlx4_is_master(dev) ||
+           !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+               return -EPROTONOSUPPORT;
+
+       if (min_tx_rate) {
+               mlx4_info(dev, "Minimum BW share not supported\n");
+               return -EPROTONOSUPPORT;
+       }
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+       err = mlx4_set_vport_qos(priv, slave, port, max_tx_rate);
+       if (err) {
+               mlx4_info(dev, "vf %d failed to set rate %d\n", vf,
+                         max_tx_rate);
+               return err;
+       }
+
+       vf_admin->tx_rate = max_tx_rate;
+       /* if VF is not in supported mode (VST with supported prio),
+        * we do not change vport configuration for its QPs, but save
+        * the rate, so it will be enforced when it moves to supported
+        * mode next time.
+        */
+       if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin)) {
+               mlx4_info(dev,
+                         "rate set for VF %d when not in valid state\n", vf);
+
+               if (vf_admin->default_vlan != MLX4_VGT)
+                       mlx4_info(dev, "VST priority not supported by QoS\n");
+               else
+                       mlx4_info(dev, "VF in VGT mode (needed VST)\n");
+
+               mlx4_info(dev,
+                         "rate %d take affect when VF moves to valid state\n",
+                         max_tx_rate);
+               return 0;
+       }
+
+       /* If user sets rate 0 assigning default vport for its QPs */
+       vf_admin->qos_vport = max_tx_rate ? slave : MLX4_VPP_DEFAULT_VPORT;
+
+       if (priv->mfunc.master.slave_state[slave].active &&
+           dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)
+               mlx4_master_immediate_activate_vlan_qos(priv, slave, port);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_rate);
+
+ /* mlx4_get_slave_default_vlan -
+ * return true if VST ( default vlan)
+ * if VST, will return vlan & qos (if not NULL)
+ */
+bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
+                                u16 *vlan, u8 *qos)
+{
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_priv *priv;
+
+       priv = mlx4_priv(dev);
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+       if (MLX4_VGT != vp_oper->state.default_vlan) {
+               if (vlan)
+                       *vlan = vp_oper->state.default_vlan;
+               if (qos)
+                       *qos = vp_oper->state.default_qos;
+               return true;
+       }
+       return false;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan);
+
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vport_state *s_info;
+       int slave;
+
+       if ((!mlx4_is_master(dev)) ||
+           !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM))
+               return -EPROTONOSUPPORT;
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+       s_info->spoofchk = setting;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk);
+
+int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vport_state *s_info;
+       int slave;
+
+       if (!mlx4_is_master(dev))
+               return -EPROTONOSUPPORT;
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+       ivf->vf = vf;
+
+       /* need to convert it to a func */
+       ivf->mac[0] = ((s_info->mac >> (5*8)) & 0xff);
+       ivf->mac[1] = ((s_info->mac >> (4*8)) & 0xff);
+       ivf->mac[2] = ((s_info->mac >> (3*8)) & 0xff);
+       ivf->mac[3] = ((s_info->mac >> (2*8)) & 0xff);
+       ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff);
+       ivf->mac[5] = ((s_info->mac)  & 0xff);
+
+       ivf->vlan               = s_info->default_vlan;
+       ivf->qos                = s_info->default_qos;
+
+       if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info))
+               ivf->max_tx_rate = s_info->tx_rate;
+       else
+               ivf->max_tx_rate = 0;
+
+       ivf->min_tx_rate        = 0;
+       ivf->spoofchk           = s_info->spoofchk;
+       ivf->linkstate          = s_info->link_state;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_vf_config);
+
+int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_vport_state *s_info;
+       int slave;
+       u8 link_stat_event;
+
+       slave = mlx4_get_slave_indx(dev, vf);
+       if (slave < 0)
+               return -EINVAL;
+
+       port = mlx4_slaves_closest_port(dev, slave, port);
+       switch (link_state) {
+       case IFLA_VF_LINK_STATE_AUTO:
+               /* get current link state */
+               if (!priv->sense.do_sense_port[port])
+                       link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+               else
+                       link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+           break;
+
+       case IFLA_VF_LINK_STATE_ENABLE:
+               link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+           break;
+
+       case IFLA_VF_LINK_STATE_DISABLE:
+               link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+           break;
+
+       default:
+               mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n",
+                         link_state, slave, port);
+               return -EINVAL;
+       };
+       s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+       s_info->link_state = link_state;
+
+       /* send event */
+       mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event);
+
+       if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+               mlx4_dbg(dev,
+                        "updating vf %d port %d no link state HW enforcment\n",
+                        vf, port);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS)
+               return 0;
+
+       return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+               MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave == mlx4_master_func_num(dev))
+               return 1;
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS)
+               return 0;
+
+       return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+               MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+                                int enabled)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       if (slave == mlx4_master_func_num(dev))
+               return 0;
+
+       if (slave < 1 || slave >= dev->num_slaves ||
+           port < 1 || port > MLX4_MAX_PORTS ||
+           enabled < 0 || enabled > 1)
+               return -EINVAL;
+
+       priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);
diff --git a/kern/drivers/net/mlx4/cq.c b/kern/drivers/net/mlx4/cq.c
new file mode 100644 (file)
index 0000000..e71f313
--- /dev/null
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/export.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+#define MLX4_CQ_STATUS_OK              ( 0 << 28)
+#define MLX4_CQ_STATUS_OVERFLOW                ( 9 << 28)
+#define MLX4_CQ_STATUS_WRITE_FAIL      (10 << 28)
+#define MLX4_CQ_FLAG_CC                        ( 1 << 18)
+#define MLX4_CQ_FLAG_OI                        ( 1 << 17)
+#define MLX4_CQ_STATE_ARMED            ( 9 <<  8)
+#define MLX4_CQ_STATE_ARMED_SOL                ( 6 <<  8)
+#define MLX4_EQ_STATE_FIRED            (10 <<  8)
+
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx4_cq_tasklet_cb(unsigned long data)
+{
+       unsigned long flags;
+       unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+       struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
+       struct mlx4_cq *mcq, *temp;
+
+       spin_lock_irqsave(&ctx->lock, flags);
+       list_splice_tail_init(&ctx->list, &ctx->process_list);
+       spin_unlock_irqrestore(&ctx->lock, flags);
+
+       list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
+               list_del_init(&mcq->tasklet_ctx.list);
+               mcq->tasklet_ctx.comp(mcq);
+               if (atomic_dec_and_test(&mcq->refcount))
+                       complete(&mcq->free);
+               if (time_after(jiffies, end))
+                       break;
+       }
+
+       if (!list_empty(&ctx->process_list))
+               tasklet_schedule(&ctx->task);
+}
+
+static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
+{
+       unsigned long flags;
+       struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+       spin_lock_irqsave(&tasklet_ctx->lock, flags);
+       /* When migrating CQs between EQs will be implemented, please note
+        * that you need to sync this point. It is possible that
+        * while migrating a CQ, completions on the old EQs could
+        * still arrive.
+        */
+       if (list_empty_careful(&cq->tasklet_ctx.list)) {
+               atomic_inc(&cq->refcount);
+               list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+       }
+       spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
+void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
+{
+       struct mlx4_cq *cq;
+
+       cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
+                              cqn & (dev->caps.num_cqs - 1));
+       if (!cq) {
+               mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
+               return;
+       }
+
+       ++cq->arm_sn;
+
+       cq->comp(cq);
+}
+
+void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
+{
+       struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+       struct mlx4_cq *cq;
+
+       spin_lock(&cq_table->lock);
+
+       cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
+       if (cq)
+               atomic_inc(&cq->refcount);
+
+       spin_unlock(&cq_table->lock);
+
+       if (!cq) {
+               mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+               return;
+       }
+
+       cq->event(cq, event_type);
+
+       if (atomic_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+}
+
+static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+                        int cq_num)
+{
+       return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
+                       MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
+                       MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+                        int cq_num, u32 opmod)
+{
+       return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
+                       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+                        int cq_num)
+{
+       return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+                           cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
+                           MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
+                  u16 count, u16 period)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_cq_context *cq_context;
+       int err;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+
+       cq_context = mailbox->buf;
+       cq_context->cq_max_count = cpu_to_be16(count);
+       cq_context->cq_period    = cpu_to_be16(period);
+
+       err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_modify);
+
+int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
+                  int entries, struct mlx4_mtt *mtt)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_cq_context *cq_context;
+       u64 mtt_addr;
+       int err;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return PTR_ERR(mailbox);
+
+       cq_context = mailbox->buf;
+       cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24);
+       cq_context->log_page_size   = mtt->page_shift - 12;
+       mtt_addr = mlx4_mtt_addr(dev, mtt);
+       cq_context->mtt_base_addr_h = mtt_addr >> 32;
+       cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+
+       err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 0);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_resize);
+
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cq_table *cq_table = &priv->cq_table;
+       int err;
+
+       *cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
+       if (*cqn == -1)
+               return -ENOMEM;
+
+       err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
+       if (err)
+               goto err_out;
+
+       err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
+       if (err)
+               goto err_put;
+       return 0;
+
+err_put:
+       mlx4_table_put(dev, &cq_table->table, *cqn);
+
+err_out:
+       mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR);
+       return err;
+}
+
+static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+       u64 out_param;
+       int err;
+
+       if (mlx4_is_mfunc(dev)) {
+               err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ,
+                                  RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+                                  MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+               if (err)
+                       return err;
+               else {
+                       *cqn = get_param_l(&out_param);
+                       return 0;
+               }
+       }
+       return __mlx4_cq_alloc_icm(dev, cqn);
+}
+
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cq_table *cq_table = &priv->cq_table;
+
+       mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
+       mlx4_table_put(dev, &cq_table->table, cqn);
+       mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR);
+}
+
+static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+       u64 in_param = 0;
+       int err;
+
+       if (mlx4_is_mfunc(dev)) {
+               set_param_l(&in_param, cqn);
+               err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP,
+                              MLX4_CMD_FREE_RES,
+                              MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+               if (err)
+                       mlx4_warn(dev, "Failed freeing cq:%d\n", cqn);
+       } else
+               __mlx4_cq_free_icm(dev, cqn);
+}
+
+int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
+                 struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
+                 struct mlx4_cq *cq, unsigned vector, int collapsed,
+                 int timestamp_en)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cq_table *cq_table = &priv->cq_table;
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_cq_context *cq_context;
+       u64 mtt_addr;
+       int err;
+
+       if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool)
+               return -EINVAL;
+
+       cq->vector = vector;
+
+       err = mlx4_cq_alloc_icm(dev, &cq->cqn);
+       if (err)
+               return err;
+
+       spin_lock_irq(&cq_table->lock);
+       err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
+       spin_unlock_irq(&cq_table->lock);
+       if (err)
+               goto err_icm;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox)) {
+               err = PTR_ERR(mailbox);
+               goto err_radix;
+       }
+
+       cq_context = mailbox->buf;
+       cq_context->flags           = cpu_to_be32(!!collapsed << 18);
+       if (timestamp_en)
+               cq_context->flags  |= cpu_to_be32(1 << 19);
+
+       cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
+       cq_context->comp_eqn        = priv->eq_table.eq[vector].eqn;
+       cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
+
+       mtt_addr = mlx4_mtt_addr(dev, mtt);
+       cq_context->mtt_base_addr_h = mtt_addr >> 32;
+       cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+       cq_context->db_rec_addr     = cpu_to_be64(db_rec);
+
+       err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       if (err)
+               goto err_radix;
+
+       cq->cons_index = 0;
+       cq->arm_sn     = 1;
+       cq->uar        = uar;
+       atomic_set(&cq->refcount, 1);
+       init_completion(&cq->free);
+       cq->comp = mlx4_add_cq_to_tasklet;
+       cq->tasklet_ctx.priv =
+               &priv->eq_table.eq[cq->vector].tasklet_ctx;
+       INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
+
+       cq->irq = priv->eq_table.eq[cq->vector].irq;
+       return 0;
+
+err_radix:
+       spin_lock_irq(&cq_table->lock);
+       radix_tree_delete(&cq_table->tree, cq->cqn);
+       spin_unlock_irq(&cq_table->lock);
+
+err_icm:
+       mlx4_cq_free_icm(dev, cq->cqn);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_alloc);
+
+void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       struct mlx4_cq_table *cq_table = &priv->cq_table;
+       int err;
+
+       err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn);
+       if (err)
+               mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
+
+       synchronize_irq(priv->eq_table.eq[cq->vector].irq);
+
+       spin_lock_irq(&cq_table->lock);
+       radix_tree_delete(&cq_table->tree, cq->cqn);
+       spin_unlock_irq(&cq_table->lock);
+
+       if (atomic_dec_and_test(&cq->refcount))
+               complete(&cq->free);
+       wait_for_completion(&cq->free);
+
+       mlx4_cq_free_icm(dev, cq->cqn);
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_free);
+
+int mlx4_init_cq_table(struct mlx4_dev *dev)
+{
+       struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+       int err;
+
+       spin_lock_init(&cq_table->lock);
+       INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+       if (mlx4_is_slave(dev))
+               return 0;
+
+       err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
+                              dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
+{
+       if (mlx4_is_slave(dev))
+               return;
+       /* Nothing to do to clean up radix_tree */
+       mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
+}
diff --git a/kern/drivers/net/mlx4/en_clock.c b/kern/drivers/net/mlx4/en_clock.c
new file mode 100644 (file)
index 0000000..8a083d7
--- /dev/null
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/device.h>
+#include <linux/clocksource.h>
+
+#include "mlx4_en.h"
+
+/* mlx4_en_read_clock - read raw cycle counter (to be used by time counter)
+ */
+static cycle_t mlx4_en_read_clock(const struct cyclecounter *tc)
+{
+       struct mlx4_en_dev *mdev =
+               container_of(tc, struct mlx4_en_dev, cycles);
+       struct mlx4_dev *dev = mdev->dev;
+
+       return mlx4_read_clock(dev) & tc->mask;
+}
+
+u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe)
+{
+       u64 hi, lo;
+       struct mlx4_ts_cqe *ts_cqe = (struct mlx4_ts_cqe *)cqe;
+
+       lo = (u64)be16_to_cpu(ts_cqe->timestamp_lo);
+       hi = ((u64)be32_to_cpu(ts_cqe->timestamp_hi) + !lo) << 16;
+
+       return hi | lo;
+}
+
+void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
+                           struct skb_shared_hwtstamps *hwts,
+                           u64 timestamp)
+{
+       unsigned long flags;
+       u64 nsec;
+
+       read_lock_irqsave(&mdev->clock_lock, flags);
+       nsec = timecounter_cyc2time(&mdev->clock, timestamp);
+       read_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       memset(hwts, 0, sizeof(struct skb_shared_hwtstamps));
+       hwts->hwtstamp = ns_to_ktime(nsec);
+}
+
+/**
+ * mlx4_en_remove_timestamp - disable PTP device
+ * @mdev: board private structure
+ *
+ * Stop the PTP support.
+ **/
+void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev)
+{
+       if (mdev->ptp_clock) {
+               ptp_clock_unregister(mdev->ptp_clock);
+               mdev->ptp_clock = NULL;
+               mlx4_info(mdev, "removed PHC\n");
+       }
+}
+
+void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev)
+{
+       bool timeout = time_is_before_jiffies(mdev->last_overflow_check +
+                                             mdev->overflow_period);
+       unsigned long flags;
+
+       if (timeout) {
+               write_lock_irqsave(&mdev->clock_lock, flags);
+               timecounter_read(&mdev->clock);
+               write_unlock_irqrestore(&mdev->clock_lock, flags);
+               mdev->last_overflow_check = jiffies;
+       }
+}
+
+/**
+ * mlx4_en_phc_adjfreq - adjust the frequency of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired frequency change in parts per billion
+ *
+ * Adjust the frequency of the PHC cycle counter by the indicated delta from
+ * the base frequency.
+ **/
+static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+       u64 adj;
+       u32 diff, mult;
+       int neg_adj = 0;
+       unsigned long flags;
+       struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+                                               ptp_clock_info);
+
+       if (delta < 0) {
+               neg_adj = 1;
+               delta = -delta;
+       }
+       mult = mdev->nominal_c_mult;
+       adj = mult;
+       adj *= delta;
+       diff = div_u64(adj, 1000000000ULL);
+
+       write_lock_irqsave(&mdev->clock_lock, flags);
+       timecounter_read(&mdev->clock);
+       mdev->cycles.mult = neg_adj ? mult - diff : mult + diff;
+       write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       return 0;
+}
+
+/**
+ * mlx4_en_phc_adjtime - Shift the time of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired change in nanoseconds
+ *
+ * Adjust the timer by resetting the timecounter structure.
+ **/
+static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+                                               ptp_clock_info);
+       unsigned long flags;
+
+       write_lock_irqsave(&mdev->clock_lock, flags);
+       timecounter_adjtime(&mdev->clock, delta);
+       write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       return 0;
+}
+
+/**
+ * mlx4_en_phc_gettime - Reads the current time from the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec structure to hold the current time value
+ *
+ * Read the timecounter and return the correct value in ns after converting
+ * it into a struct timespec.
+ **/
+static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp,
+                              struct timespec64 *ts)
+{
+       struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+                                               ptp_clock_info);
+       unsigned long flags;
+       u64 ns;
+
+       write_lock_irqsave(&mdev->clock_lock, flags);
+       ns = timecounter_read(&mdev->clock);
+       write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       *ts = ns_to_timespec64(ns);
+
+       return 0;
+}
+
+/**
+ * mlx4_en_phc_settime - Set the current time on the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec containing the new time for the cycle counter
+ *
+ * Reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ **/
+static int mlx4_en_phc_settime(struct ptp_clock_info *ptp,
+                              const struct timespec64 *ts)
+{
+       struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+                                               ptp_clock_info);
+       u64 ns = timespec64_to_ns(ts);
+       unsigned long flags;
+
+       /* reset the timecounter */
+       write_lock_irqsave(&mdev->clock_lock, flags);
+       timecounter_init(&mdev->clock, &mdev->cycles, ns);
+       write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       return 0;
+}
+
+/**
+ * mlx4_en_phc_enable - enable or disable an ancillary feature
+ * @ptp: ptp clock structure
+ * @request: Desired resource to enable or disable
+ * @on: Caller passes one to enable or zero to disable
+ *
+ * Enable (or disable) ancillary features of the PHC subsystem.
+ * Currently, no ancillary features are supported.
+ **/
+static int mlx4_en_phc_enable(struct ptp_clock_info __always_unused *ptp,
+                             struct ptp_clock_request __always_unused *request,
+                             int __always_unused on)
+{
+       return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
+       .owner          = THIS_MODULE,
+       .max_adj        = 100000000,
+       .n_alarm        = 0,
+       .n_ext_ts       = 0,
+       .n_per_out      = 0,
+       .n_pins         = 0,
+       .pps            = 0,
+       .adjfreq        = mlx4_en_phc_adjfreq,
+       .adjtime        = mlx4_en_phc_adjtime,
+       .gettime64      = mlx4_en_phc_gettime,
+       .settime64      = mlx4_en_phc_settime,
+       .enable         = mlx4_en_phc_enable,
+};
+
+void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
+{
+       struct mlx4_dev *dev = mdev->dev;
+       unsigned long flags;
+       u64 ns, zero = 0;
+
+       rwlock_init(&mdev->clock_lock);
+
+       memset(&mdev->cycles, 0, sizeof(mdev->cycles));
+       mdev->cycles.read = mlx4_en_read_clock;
+       mdev->cycles.mask = CLOCKSOURCE_MASK(48);
+       /* Using shift to make calculation more accurate. Since current HW
+        * clock frequency is 427 MHz, and cycles are given using a 48 bits
+        * register, the biggest shift when calculating using u64, is 14
+        * (max_cycles * multiplier < 2^64)
+        */
+       mdev->cycles.shift = 14;
+       mdev->cycles.mult =
+               clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
+       mdev->nominal_c_mult = mdev->cycles.mult;
+
+       write_lock_irqsave(&mdev->clock_lock, flags);
+       timecounter_init(&mdev->clock, &mdev->cycles,
+                        ktime_to_ns(ktime_get_real()));
+       write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+       /* Calculate period in seconds to call the overflow watchdog - to make
+        * sure counter is checked at least once every wrap around.
+        */
+       ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask, zero, &zero);
+       do_div(ns, NSEC_PER_SEC / 2 / HZ);
+       mdev->overflow_period = ns;
+
+       /* Configure the PHC */
+       mdev->ptp_clock_info = mlx4_en_ptp_clock_info;
+       snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp");
+
+       mdev->ptp_clock = ptp_clock_register(&mdev->ptp_clock_info,
+                                            &mdev->pdev->dev);
+       if (IS_ERR(mdev->ptp_clock)) {
+               mdev->ptp_clock = NULL;
+               mlx4_err(mdev, "ptp_clock_register failed\n");
+       } else {
+               mlx4_info(mdev, "registered PHC clock\n");
+       }
+
+}
diff --git a/kern/drivers/net/mlx4/en_cq.c b/kern/drivers/net/mlx4/en_cq.c
new file mode 100644 (file)
index 0000000..22da4d0
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
+{
+       return;
+}
+
+
+int mlx4_en_create_cq(struct mlx4_en_priv *priv,
+                     struct mlx4_en_cq **pcq,
+                     int entries, int ring, enum cq_type mode,
+                     int node)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_cq *cq;
+       int err;
+
+       cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
+       if (!cq) {
+               cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+               if (!cq) {
+                       en_err(priv, "Failed to allocate CQ structure\n");
+                       return -ENOMEM;
+               }
+       }
+
+       cq->size = entries;
+       cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
+
+       cq->ring = ring;
+       cq->is_tx = mode;
+
+       /* Allocate HW buffers on provided NUMA node.
+        * dev->numa_node is used in mtt range allocation flow.
+        */
+       set_dev_node(&mdev->dev->persist->pdev->dev, node);
+       err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
+                               cq->buf_size, 2 * PAGE_SIZE);
+       set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
+       if (err)
+               goto err_cq;
+
+       err = mlx4_en_map_buffer(&cq->wqres.buf);
+       if (err)
+               goto err_res;
+
+       cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
+       *pcq = cq;
+
+       return 0;
+
+err_res:
+       mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+err_cq:
+       kfree(cq);
+       *pcq = NULL;
+       return err;
+}
+
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+                       int cq_idx)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err = 0;
+       char name[25];
+       int timestamp_en = 0;
+       struct cpu_rmap *rmap =
+#ifdef CONFIG_RFS_ACCEL
+               priv->dev->rx_cpu_rmap;
+#else
+               NULL;
+#endif
+
+       cq->dev = mdev->pndev[priv->port];
+       cq->mcq.set_ci_db  = cq->wqres.db.db;
+       cq->mcq.arm_db     = cq->wqres.db.db + 1;
+       *cq->mcq.set_ci_db = 0;
+       *cq->mcq.arm_db    = 0;
+       memset(cq->buf, 0, cq->buf_size);
+
+       if (cq->is_tx == RX) {
+               if (mdev->dev->caps.comp_pool) {
+                       if (!cq->vector) {
+                               sprintf(name, "%s-%d", priv->dev->name,
+                                       cq->ring);
+                               /* Set IRQ for specific name (per ring) */
+                               if (mlx4_assign_eq(mdev->dev, name, rmap,
+                                                  &cq->vector)) {
+                                       cq->vector = (cq->ring + 1 + priv->port)
+                                           % mdev->dev->caps.num_comp_vectors;
+                                       mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
+                                                 name);
+                               }
+
+                       }
+               } else {
+                       cq->vector = (cq->ring + 1 + priv->port) %
+                               mdev->dev->caps.num_comp_vectors;
+               }
+
+               cq->irq_desc =
+                       irq_to_desc(mlx4_eq_get_irq(mdev->dev,
+                                                   cq->vector));
+       } else {
+               /* For TX we use the same irq per
+               ring we assigned for the RX    */
+               struct mlx4_en_cq *rx_cq;
+
+               cq_idx = cq_idx % priv->rx_ring_num;
+               rx_cq = priv->rx_cq[cq_idx];
+               cq->vector = rx_cq->vector;
+       }
+
+       if (!cq->is_tx)
+               cq->size = priv->rx_ring[cq->ring]->actual_size;
+
+       if ((cq->is_tx && priv->hwtstamp_config.tx_type) ||
+           (!cq->is_tx && priv->hwtstamp_config.rx_filter))
+               timestamp_en = 1;
+
+       err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
+                           &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
+                           cq->vector, 0, timestamp_en);
+       if (err)
+               return err;
+
+       cq->mcq.comp  = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
+       cq->mcq.event = mlx4_en_cq_event;
+
+       if (cq->is_tx) {
+               netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
+                              NAPI_POLL_WEIGHT);
+       } else {
+               struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
+
+               err = irq_set_affinity_hint(cq->mcq.irq,
+                                           ring->affinity_mask);
+               if (err)
+                       mlx4_warn(mdev, "Failed setting affinity hint\n");
+
+               netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
+               napi_hash_add(&cq->napi);
+       }
+
+       napi_enable(&cq->napi);
+
+       return 0;
+}
+
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_cq *cq = *pcq;
+
+       mlx4_en_unmap_buffer(&cq->wqres.buf);
+       mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+       if (priv->mdev->dev->caps.comp_pool && cq->vector) {
+               mlx4_release_eq(priv->mdev->dev, cq->vector);
+       }
+       cq->vector = 0;
+       cq->buf_size = 0;
+       cq->buf = NULL;
+       kfree(cq);
+       *pcq = NULL;
+}
+
+void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+       napi_disable(&cq->napi);
+       if (!cq->is_tx) {
+               napi_hash_del(&cq->napi);
+               synchronize_rcu();
+               irq_set_affinity_hint(cq->mcq.irq, NULL);
+       }
+       netif_napi_del(&cq->napi);
+
+       mlx4_cq_free(priv->mdev->dev, &cq->mcq);
+}
+
+/* Set rx cq moderation parameters */
+int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+       return mlx4_cq_modify(priv->mdev->dev, &cq->mcq,
+                             cq->moder_cnt, cq->moder_time);
+}
+
+int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+       mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map,
+                   &priv->mdev->uar_lock);
+
+       return 0;
+}
+
+
diff --git a/kern/drivers/net/mlx4/en_dcb_nl.c b/kern/drivers/net/mlx4/en_dcb_nl.c
new file mode 100644 (file)
index 0000000..f01918c
--- /dev/null
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2011 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/dcbnl.h>
+#include <linux/math64.h>
+
+#include "mlx4_en.h"
+#include "fw_qos.h"
+
+/* Definitions for QCN
+ */
+
+struct mlx4_congestion_control_mb_prio_802_1_qau_params {
+       __be32 modify_enable_high;
+       __be32 modify_enable_low;
+       __be32 reserved1;
+       __be32 extended_enable;
+       __be32 rppp_max_rps;
+       __be32 rpg_time_reset;
+       __be32 rpg_byte_reset;
+       __be32 rpg_threshold;
+       __be32 rpg_max_rate;
+       __be32 rpg_ai_rate;
+       __be32 rpg_hai_rate;
+       __be32 rpg_gd;
+       __be32 rpg_min_dec_fac;
+       __be32 rpg_min_rate;
+       __be32 max_time_rise;
+       __be32 max_byte_rise;
+       __be32 max_qdelta;
+       __be32 min_qoffset;
+       __be32 gd_coefficient;
+       __be32 reserved2[5];
+       __be32 cp_sample_base;
+       __be32 reserved3[39];
+};
+
+struct mlx4_congestion_control_mb_prio_802_1_qau_statistics {
+       __be64 rppp_rp_centiseconds;
+       __be32 reserved1;
+       __be32 ignored_cnm;
+       __be32 rppp_created_rps;
+       __be32 estimated_total_rate;
+       __be32 max_active_rate_limiter_index;
+       __be32 dropped_cnms_busy_fw;
+       __be32 reserved2;
+       __be32 cnms_handled_successfully;
+       __be32 min_total_limiters_rate;
+       __be32 max_total_limiters_rate;
+       __be32 reserved3[4];
+};
+
+static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev,
+                                  struct ieee_ets *ets)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct ieee_ets *my_ets = &priv->ets;
+
+       /* No IEEE PFC settings available */
+       if (!my_ets)
+               return -EINVAL;
+
+       ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+       ets->cbs = my_ets->cbs;
+       memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw));
+       memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
+       memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
+
+       return 0;
+}
+
+static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets)
+{
+       int i;
+       int total_ets_bw = 0;
+       int has_ets_tc = 0;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) {
+                       en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n",
+                                       i, ets->prio_tc[i]);
+                       return -EINVAL;
+               }
+
+               switch (ets->tc_tsa[i]) {
+               case IEEE_8021QAZ_TSA_STRICT:
+                       break;
+               case IEEE_8021QAZ_TSA_ETS:
+                       has_ets_tc = 1;
+                       total_ets_bw += ets->tc_tx_bw[i];
+                       break;
+               default:
+                       en_err(priv, "TC[%d]: Not supported TSA: %d\n",
+                                       i, ets->tc_tsa[i]);
+                       return -ENOTSUPP;
+               }
+       }
+
+       if (has_ets_tc && total_ets_bw != MLX4_EN_BW_MAX) {
+               en_err(priv, "Bad ETS BW sum: %d. Should be exactly 100%%\n",
+                               total_ets_bw);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int mlx4_en_config_port_scheduler(struct mlx4_en_priv *priv,
+               struct ieee_ets *ets, u16 *ratelimit)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int num_strict = 0;
+       int i;
+       __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS] = { 0 };
+       __u8 pg[IEEE_8021QAZ_MAX_TCS] = { 0 };
+
+       ets = ets ?: &priv->ets;
+       ratelimit = ratelimit ?: priv->maxrate;
+
+       /* higher TC means higher priority => lower pg */
+       for (i = IEEE_8021QAZ_MAX_TCS - 1; i >= 0; i--) {
+               switch (ets->tc_tsa[i]) {
+               case IEEE_8021QAZ_TSA_STRICT:
+                       pg[i] = num_strict++;
+                       tc_tx_bw[i] = MLX4_EN_BW_MAX;
+                       break;
+               case IEEE_8021QAZ_TSA_ETS:
+                       pg[i] = MLX4_EN_TC_ETS;
+                       tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX4_EN_BW_MIN;
+                       break;
+               }
+       }
+
+       return mlx4_SET_PORT_SCHEDULER(mdev->dev, priv->port, tc_tx_bw, pg,
+                       ratelimit);
+}
+
+static int
+mlx4_en_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err;
+
+       err = mlx4_en_ets_validate(priv, ets);
+       if (err)
+               return err;
+
+       err = mlx4_SET_PORT_PRIO2TC(mdev->dev, priv->port, ets->prio_tc);
+       if (err)
+               return err;
+
+       err = mlx4_en_config_port_scheduler(priv, ets, NULL);
+       if (err)
+               return err;
+
+       memcpy(&priv->ets, ets, sizeof(priv->ets));
+
+       return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_getpfc(struct net_device *dev,
+               struct ieee_pfc *pfc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+       pfc->pfc_en = priv->prof->tx_ppp;
+
+       return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
+               struct ieee_pfc *pfc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_port_profile *prof = priv->prof;
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err;
+
+       en_dbg(DRV, priv, "cap: 0x%x en: 0x%x mbc: 0x%x delay: %d\n",
+                       pfc->pfc_cap,
+                       pfc->pfc_en,
+                       pfc->mbc,
+                       pfc->delay);
+
+       prof->rx_pause = !pfc->pfc_en;
+       prof->tx_pause = !pfc->pfc_en;
+       prof->rx_ppp = pfc->pfc_en;
+       prof->tx_ppp = pfc->pfc_en;
+
+       err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+                                   priv->rx_skb_size + ETH_FCS_LEN,
+                                   prof->tx_pause,
+                                   prof->tx_ppp,
+                                   prof->rx_pause,
+                                   prof->rx_ppp);
+       if (err)
+               en_err(priv, "Failed setting pause params\n");
+       else
+               mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+                                               prof->rx_ppp, prof->rx_pause,
+                                               prof->tx_ppp, prof->tx_pause);
+
+       return err;
+}
+
+static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev)
+{
+       return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+       if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+           (mode & DCB_CAP_DCBX_VER_CEE) ||
+           !(mode & DCB_CAP_DCBX_VER_IEEE) ||
+           !(mode & DCB_CAP_DCBX_HOST))
+               return 1;
+
+       return 0;
+}
+
+#define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */
+static int mlx4_en_dcbnl_ieee_getmaxrate(struct net_device *dev,
+                                  struct ieee_maxrate *maxrate)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               maxrate->tc_maxrate[i] =
+                       priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB;
+
+       return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setmaxrate(struct net_device *dev,
+               struct ieee_maxrate *maxrate)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       u16 tmp[IEEE_8021QAZ_MAX_TCS];
+       int i, err;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               /* Convert from Kbps into HW units, rounding result up.
+                * Setting to 0, means unlimited BW.
+                */
+               tmp[i] = div_u64(maxrate->tc_maxrate[i] +
+                                MLX4_RATELIMIT_UNITS_IN_KB - 1,
+                                MLX4_RATELIMIT_UNITS_IN_KB);
+       }
+
+       err = mlx4_en_config_port_scheduler(priv, NULL, tmp);
+       if (err)
+               return err;
+
+       memcpy(priv->maxrate, tmp, sizeof(priv->maxrate));
+
+       return 0;
+}
+
+#define RPG_ENABLE_BIT 31
+#define CN_TAG_BIT     30
+
+static int mlx4_en_dcbnl_ieee_getqcn(struct net_device *dev,
+                                    struct ieee_qcn *qcn)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn;
+       struct mlx4_cmd_mailbox *mailbox_out = NULL;
+       u64 mailbox_in_dma = 0;
+       u32 inmod = 0;
+       int i, err;
+
+       if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+               return -EOPNOTSUPP;
+
+       mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+       if (IS_ERR(mailbox_out))
+               return -ENOMEM;
+       hw_qcn =
+       (struct mlx4_congestion_control_mb_prio_802_1_qau_params *)
+       mailbox_out->buf;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               inmod = priv->port | ((1 << i) << 8) |
+                        (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+               err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma,
+                                  mailbox_out->dma,
+                                  inmod, MLX4_CONGESTION_CONTROL_GET_PARAMS,
+                                  MLX4_CMD_CONGESTION_CTRL_OPCODE,
+                                  MLX4_CMD_TIME_CLASS_C,
+                                  MLX4_CMD_NATIVE);
+               if (err) {
+                       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+                       return err;
+               }
+
+               qcn->rpg_enable[i] =
+                       be32_to_cpu(hw_qcn->extended_enable) >> RPG_ENABLE_BIT;
+               qcn->rppp_max_rps[i] =
+                       be32_to_cpu(hw_qcn->rppp_max_rps);
+               qcn->rpg_time_reset[i] =
+                       be32_to_cpu(hw_qcn->rpg_time_reset);
+               qcn->rpg_byte_reset[i] =
+                       be32_to_cpu(hw_qcn->rpg_byte_reset);
+               qcn->rpg_threshold[i] =
+                       be32_to_cpu(hw_qcn->rpg_threshold);
+               qcn->rpg_max_rate[i] =
+                       be32_to_cpu(hw_qcn->rpg_max_rate);
+               qcn->rpg_ai_rate[i] =
+                       be32_to_cpu(hw_qcn->rpg_ai_rate);
+               qcn->rpg_hai_rate[i] =
+                       be32_to_cpu(hw_qcn->rpg_hai_rate);
+               qcn->rpg_gd[i] =
+                       be32_to_cpu(hw_qcn->rpg_gd);
+               qcn->rpg_min_dec_fac[i] =
+                       be32_to_cpu(hw_qcn->rpg_min_dec_fac);
+               qcn->rpg_min_rate[i] =
+                       be32_to_cpu(hw_qcn->rpg_min_rate);
+               qcn->cndd_state_machine[i] =
+                       priv->cndd_state[i];
+       }
+       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+       return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setqcn(struct net_device *dev,
+                                    struct ieee_qcn *qcn)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn;
+       struct mlx4_cmd_mailbox *mailbox_in = NULL;
+       u64 mailbox_in_dma = 0;
+       u32 inmod = 0;
+       int i, err;
+#define MODIFY_ENABLE_HIGH_MASK 0xc0000000
+#define MODIFY_ENABLE_LOW_MASK 0xffc00000
+
+       if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+               return -EOPNOTSUPP;
+
+       mailbox_in = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+       if (IS_ERR(mailbox_in))
+               return -ENOMEM;
+
+       mailbox_in_dma = mailbox_in->dma;
+       hw_qcn =
+       (struct mlx4_congestion_control_mb_prio_802_1_qau_params *)mailbox_in->buf;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               inmod = priv->port | ((1 << i) << 8) |
+                        (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+
+               /* Before updating QCN parameter,
+                * need to set it's modify enable bit to 1
+                */
+
+               hw_qcn->modify_enable_high = cpu_to_be32(
+                                               MODIFY_ENABLE_HIGH_MASK);
+               hw_qcn->modify_enable_low = cpu_to_be32(MODIFY_ENABLE_LOW_MASK);
+
+               hw_qcn->extended_enable = cpu_to_be32(qcn->rpg_enable[i] << RPG_ENABLE_BIT);
+               hw_qcn->rppp_max_rps = cpu_to_be32(qcn->rppp_max_rps[i]);
+               hw_qcn->rpg_time_reset = cpu_to_be32(qcn->rpg_time_reset[i]);
+               hw_qcn->rpg_byte_reset = cpu_to_be32(qcn->rpg_byte_reset[i]);
+               hw_qcn->rpg_threshold = cpu_to_be32(qcn->rpg_threshold[i]);
+               hw_qcn->rpg_max_rate = cpu_to_be32(qcn->rpg_max_rate[i]);
+               hw_qcn->rpg_ai_rate = cpu_to_be32(qcn->rpg_ai_rate[i]);
+               hw_qcn->rpg_hai_rate = cpu_to_be32(qcn->rpg_hai_rate[i]);
+               hw_qcn->rpg_gd = cpu_to_be32(qcn->rpg_gd[i]);
+               hw_qcn->rpg_min_dec_fac = cpu_to_be32(qcn->rpg_min_dec_fac[i]);
+               hw_qcn->rpg_min_rate = cpu_to_be32(qcn->rpg_min_rate[i]);
+               priv->cndd_state[i] = qcn->cndd_state_machine[i];
+               if (qcn->cndd_state_machine[i] == DCB_CNDD_INTERIOR_READY)
+                       hw_qcn->extended_enable |= cpu_to_be32(1 << CN_TAG_BIT);
+
+               err = mlx4_cmd(priv->mdev->dev, mailbox_in_dma, inmod,
+                              MLX4_CONGESTION_CONTROL_SET_PARAMS,
+                              MLX4_CMD_CONGESTION_CTRL_OPCODE,
+                              MLX4_CMD_TIME_CLASS_C,
+                              MLX4_CMD_NATIVE);
+               if (err) {
+                       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in);
+                       return err;
+               }
+       }
+       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in);
+       return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev,
+                                         struct ieee_qcn_stats *qcn_stats)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *hw_qcn_stats;
+       struct mlx4_cmd_mailbox *mailbox_out = NULL;
+       u64 mailbox_in_dma = 0;
+       u32 inmod = 0;
+       int i, err;
+
+       if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+               return -EOPNOTSUPP;
+
+       mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+       if (IS_ERR(mailbox_out))
+               return -ENOMEM;
+
+       hw_qcn_stats =
+       (struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *)
+       mailbox_out->buf;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               inmod = priv->port | ((1 << i) << 8) |
+                        (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+               err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma,
+                                  mailbox_out->dma, inmod,
+                                  MLX4_CONGESTION_CONTROL_GET_STATISTICS,
+                                  MLX4_CMD_CONGESTION_CTRL_OPCODE,
+                                  MLX4_CMD_TIME_CLASS_C,
+                                  MLX4_CMD_NATIVE);
+               if (err) {
+                       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+                       return err;
+               }
+               qcn_stats->rppp_rp_centiseconds[i] =
+                       be64_to_cpu(hw_qcn_stats->rppp_rp_centiseconds);
+               qcn_stats->rppp_created_rps[i] =
+                       be32_to_cpu(hw_qcn_stats->rppp_created_rps);
+       }
+       mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+       return 0;
+}
+
+const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = {
+       .ieee_getets    = mlx4_en_dcbnl_ieee_getets,
+       .ieee_setets    = mlx4_en_dcbnl_ieee_setets,
+       .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate,
+       .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate,
+       .ieee_getpfc    = mlx4_en_dcbnl_ieee_getpfc,
+       .ieee_setpfc    = mlx4_en_dcbnl_ieee_setpfc,
+
+       .getdcbx        = mlx4_en_dcbnl_getdcbx,
+       .setdcbx        = mlx4_en_dcbnl_setdcbx,
+       .ieee_getqcn    = mlx4_en_dcbnl_ieee_getqcn,
+       .ieee_setqcn    = mlx4_en_dcbnl_ieee_setqcn,
+       .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats,
+};
+
+const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = {
+       .ieee_getpfc    = mlx4_en_dcbnl_ieee_getpfc,
+       .ieee_setpfc    = mlx4_en_dcbnl_ieee_setpfc,
+
+       .getdcbx        = mlx4_en_dcbnl_getdcbx,
+       .setdcbx        = mlx4_en_dcbnl_setdcbx,
+};
diff --git a/kern/drivers/net/mlx4/en_ethtool.c b/kern/drivers/net/mlx4/en_ethtool.c
new file mode 100644 (file)
index 0000000..a2ddf3d
--- /dev/null
@@ -0,0 +1,2011 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/in.h>
+#include <net/ip.h>
+#include <linux/bitmap.h>
+
+#include "mlx4_en.h"
+#include "en_port.h"
+
+#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
+#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
+#define EN_ETHTOOL_WORD_MASK  cpu_to_be32(0xffffffff)
+
+static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
+{
+       int i;
+       int err = 0;
+
+       for (i = 0; i < priv->tx_ring_num; i++) {
+               priv->tx_cq[i]->moder_cnt = priv->tx_frames;
+               priv->tx_cq[i]->moder_time = priv->tx_usecs;
+               if (priv->port_up) {
+                       err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]);
+                       if (err)
+                               return err;
+               }
+       }
+
+       if (priv->adaptive_rx_coal)
+               return 0;
+
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               priv->rx_cq[i]->moder_cnt = priv->rx_frames;
+               priv->rx_cq[i]->moder_time = priv->rx_usecs;
+               priv->last_moder_time[i] = MLX4_EN_AUTO_CONF;
+               if (priv->port_up) {
+                       err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return err;
+}
+
+static void
+mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+
+       strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+       strlcpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")",
+               sizeof(drvinfo->version));
+       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+               "%d.%d.%d",
+               (u16) (mdev->dev->caps.fw_ver >> 32),
+               (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
+               (u16) (mdev->dev->caps.fw_ver & 0xffff));
+       strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev),
+               sizeof(drvinfo->bus_info));
+       drvinfo->n_stats = 0;
+       drvinfo->regdump_len = 0;
+       drvinfo->eedump_len = 0;
+}
+
+static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = {
+       "blueflame",
+};
+
+static const char main_strings[][ETH_GSTRING_LEN] = {
+       /* main statistics */
+       "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+       "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+       "rx_length_errors", "rx_over_errors", "rx_crc_errors",
+       "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+       "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+       "tx_heartbeat_errors", "tx_window_errors",
+
+       /* port statistics */
+       "tso_packets",
+       "xmit_more",
+       "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
+       "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
+
+       /* priority flow control statistics rx */
+       "rx_pause_prio_0", "rx_pause_duration_prio_0",
+       "rx_pause_transition_prio_0",
+       "rx_pause_prio_1", "rx_pause_duration_prio_1",
+       "rx_pause_transition_prio_1",
+       "rx_pause_prio_2", "rx_pause_duration_prio_2",
+       "rx_pause_transition_prio_2",
+       "rx_pause_prio_3", "rx_pause_duration_prio_3",
+       "rx_pause_transition_prio_3",
+       "rx_pause_prio_4", "rx_pause_duration_prio_4",
+       "rx_pause_transition_prio_4",
+       "rx_pause_prio_5", "rx_pause_duration_prio_5",
+       "rx_pause_transition_prio_5",
+       "rx_pause_prio_6", "rx_pause_duration_prio_6",
+       "rx_pause_transition_prio_6",
+       "rx_pause_prio_7", "rx_pause_duration_prio_7",
+       "rx_pause_transition_prio_7",
+
+       /* flow control statistics rx */
+       "rx_pause", "rx_pause_duration", "rx_pause_transition",
+
+       /* priority flow control statistics tx */
+       "tx_pause_prio_0", "tx_pause_duration_prio_0",
+       "tx_pause_transition_prio_0",
+       "tx_pause_prio_1", "tx_pause_duration_prio_1",
+       "tx_pause_transition_prio_1",
+       "tx_pause_prio_2", "tx_pause_duration_prio_2",
+       "tx_pause_transition_prio_2",
+       "tx_pause_prio_3", "tx_pause_duration_prio_3",
+       "tx_pause_transition_prio_3",
+       "tx_pause_prio_4", "tx_pause_duration_prio_4",
+       "tx_pause_transition_prio_4",
+       "tx_pause_prio_5", "tx_pause_duration_prio_5",
+       "tx_pause_transition_prio_5",
+       "tx_pause_prio_6", "tx_pause_duration_prio_6",
+       "tx_pause_transition_prio_6",
+       "tx_pause_prio_7", "tx_pause_duration_prio_7",
+       "tx_pause_transition_prio_7",
+
+       /* flow control statistics tx */
+       "tx_pause", "tx_pause_duration", "tx_pause_transition",
+
+       /* packet statistics */
+       "rx_multicast_packets",
+       "rx_broadcast_packets",
+       "rx_jabbers",
+       "rx_in_range_length_error",
+       "rx_out_range_length_error",
+       "tx_multicast_packets",
+       "tx_broadcast_packets",
+       "rx_prio_0_packets", "rx_prio_0_bytes",
+       "rx_prio_1_packets", "rx_prio_1_bytes",
+       "rx_prio_2_packets", "rx_prio_2_bytes",
+       "rx_prio_3_packets", "rx_prio_3_bytes",
+       "rx_prio_4_packets", "rx_prio_4_bytes",
+       "rx_prio_5_packets", "rx_prio_5_bytes",
+       "rx_prio_6_packets", "rx_prio_6_bytes",
+       "rx_prio_7_packets", "rx_prio_7_bytes",
+       "rx_novlan_packets", "rx_novlan_bytes",
+       "tx_prio_0_packets", "tx_prio_0_bytes",
+       "tx_prio_1_packets", "tx_prio_1_bytes",
+       "tx_prio_2_packets", "tx_prio_2_bytes",
+       "tx_prio_3_packets", "tx_prio_3_bytes",
+       "tx_prio_4_packets", "tx_prio_4_bytes",
+       "tx_prio_5_packets", "tx_prio_5_bytes",
+       "tx_prio_6_packets", "tx_prio_6_bytes",
+       "tx_prio_7_packets", "tx_prio_7_bytes",
+       "tx_novlan_packets", "tx_novlan_bytes",
+
+};
+
+static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
+       "Interrupt Test",
+       "Link Test",
+       "Speed Test",
+       "Register Test",
+       "Loopback Test",
+};
+
+static u32 mlx4_en_get_msglevel(struct net_device *dev)
+{
+       return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable;
+}
+
+static void mlx4_en_set_msglevel(struct net_device *dev, u32 val)
+{
+       ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val;
+}
+
+static void mlx4_en_get_wol(struct net_device *netdev,
+                           struct ethtool_wolinfo *wol)
+{
+       struct mlx4_en_priv *priv = netdev_priv(netdev);
+       int err = 0;
+       u64 config = 0;
+       u64 mask;
+
+       if ((priv->port < 1) || (priv->port > 2)) {
+               en_err(priv, "Failed to get WoL information\n");
+               return;
+       }
+
+       mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+               MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+       if (!(priv->mdev->dev->caps.flags & mask)) {
+               wol->supported = 0;
+               wol->wolopts = 0;
+               return;
+       }
+
+       err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+       if (err) {
+               en_err(priv, "Failed to get WoL information\n");
+               return;
+       }
+
+       if (config & MLX4_EN_WOL_MAGIC)
+               wol->supported = WAKE_MAGIC;
+       else
+               wol->supported = 0;
+
+       if (config & MLX4_EN_WOL_ENABLED)
+               wol->wolopts = WAKE_MAGIC;
+       else
+               wol->wolopts = 0;
+}
+
+static int mlx4_en_set_wol(struct net_device *netdev,
+                           struct ethtool_wolinfo *wol)
+{
+       struct mlx4_en_priv *priv = netdev_priv(netdev);
+       u64 config = 0;
+       int err = 0;
+       u64 mask;
+
+       if ((priv->port < 1) || (priv->port > 2))
+               return -EOPNOTSUPP;
+
+       mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+               MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+       if (!(priv->mdev->dev->caps.flags & mask))
+               return -EOPNOTSUPP;
+
+       if (wol->supported & ~WAKE_MAGIC)
+               return -EINVAL;
+
+       err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+       if (err) {
+               en_err(priv, "Failed to get WoL info, unable to modify\n");
+               return err;
+       }
+
+       if (wol->wolopts & WAKE_MAGIC) {
+               config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
+                               MLX4_EN_WOL_MAGIC;
+       } else {
+               config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
+               config |= MLX4_EN_WOL_DO_MODIFY;
+       }
+
+       err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
+       if (err)
+               en_err(priv, "Failed to set WoL information\n");
+
+       return err;
+}
+
+struct bitmap_iterator {
+       unsigned long *stats_bitmap;
+       unsigned int count;
+       unsigned int iterator;
+       bool advance_array; /* if set, force no increments */
+};
+
+static inline void bitmap_iterator_init(struct bitmap_iterator *h,
+                                       unsigned long *stats_bitmap,
+                                       int count)
+{
+       h->iterator = 0;
+       h->advance_array = !bitmap_empty(stats_bitmap, count);
+       h->count = h->advance_array ? bitmap_weight(stats_bitmap, count)
+               : count;
+       h->stats_bitmap = stats_bitmap;
+}
+
+static inline int bitmap_iterator_test(struct bitmap_iterator *h)
+{
+       return !h->advance_array ? 1 : test_bit(h->iterator, h->stats_bitmap);
+}
+
+static inline int bitmap_iterator_inc(struct bitmap_iterator *h)
+{
+       return h->iterator++;
+}
+
+static inline unsigned int
+bitmap_iterator_count(struct bitmap_iterator *h)
+{
+       return h->count;
+}
+
+static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct bitmap_iterator it;
+
+       bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+       switch (sset) {
+       case ETH_SS_STATS:
+               return bitmap_iterator_count(&it) +
+                       (priv->tx_ring_num * 2) +
+#ifdef CONFIG_NET_RX_BUSY_POLL
+                       (priv->rx_ring_num * 5);
+#else
+                       (priv->rx_ring_num * 2);
+#endif
+       case ETH_SS_TEST:
+               return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
+                                       & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
+       case ETH_SS_PRIV_FLAGS:
+               return ARRAY_SIZE(mlx4_en_priv_flags);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void mlx4_en_get_ethtool_stats(struct net_device *dev,
+               struct ethtool_stats *stats, uint64_t *data)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int index = 0;
+       int i;
+       struct bitmap_iterator it;
+
+       bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+       spin_lock_bh(&priv->stats_lock);
+
+       for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->stats)[i];
+
+       for (i = 0; i < NUM_PORT_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->port_stats)[i];
+
+       for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX;
+            i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] =
+                               ((u64 *)&priv->rx_priority_flowstats)[i];
+
+       for (i = 0; i < NUM_FLOW_STATS_RX; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((u64 *)&priv->rx_flowstats)[i];
+
+       for (i = 0; i < NUM_FLOW_PRIORITY_STATS_TX;
+            i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] =
+                               ((u64 *)&priv->tx_priority_flowstats)[i];
+
+       for (i = 0; i < NUM_FLOW_STATS_TX; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((u64 *)&priv->tx_flowstats)[i];
+
+       for (i = 0; i < NUM_PKT_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->pkstats)[i];
+
+       for (i = 0; i < priv->tx_ring_num; i++) {
+               data[index++] = priv->tx_ring[i]->packets;
+               data[index++] = priv->tx_ring[i]->bytes;
+       }
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               data[index++] = priv->rx_ring[i]->packets;
+               data[index++] = priv->rx_ring[i]->bytes;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+               data[index++] = priv->rx_ring[i]->yields;
+               data[index++] = priv->rx_ring[i]->misses;
+               data[index++] = priv->rx_ring[i]->cleaned;
+#endif
+       }
+       spin_unlock_bh(&priv->stats_lock);
+
+}
+
+static void mlx4_en_self_test(struct net_device *dev,
+                             struct ethtool_test *etest, u64 *buf)
+{
+       mlx4_en_ex_selftest(dev, &etest->flags, buf);
+}
+
+static void mlx4_en_get_strings(struct net_device *dev,
+                               uint32_t stringset, uint8_t *data)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int index = 0;
+       int i, strings = 0;
+       struct bitmap_iterator it;
+
+       bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+       switch (stringset) {
+       case ETH_SS_TEST:
+               for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++)
+                       strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+               if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK)
+                       for (; i < MLX4_EN_NUM_SELF_TEST; i++)
+                               strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+               break;
+
+       case ETH_SS_STATS:
+               /* Add main counters */
+               for (i = 0; i < NUM_MAIN_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
+               for (i = 0; i < NUM_PORT_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
+               for (i = 0; i < NUM_FLOW_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
+               for (i = 0; i < NUM_PKT_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
+               for (i = 0; i < priv->tx_ring_num; i++) {
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "tx%d_packets", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "tx%d_bytes", i);
+               }
+               for (i = 0; i < priv->rx_ring_num; i++) {
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_packets", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_bytes", i);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_napi_yield", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_misses", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_cleaned", i);
+#endif
+               }
+               break;
+       case ETH_SS_PRIV_FLAGS:
+               for (i = 0; i < ARRAY_SIZE(mlx4_en_priv_flags); i++)
+                       strcpy(data + i * ETH_GSTRING_LEN,
+                              mlx4_en_priv_flags[i]);
+               break;
+
+       }
+}
+
+static u32 mlx4_en_autoneg_get(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       u32 autoneg = AUTONEG_DISABLE;
+
+       if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) &&
+           (priv->port_state.flags & MLX4_EN_PORT_ANE))
+               autoneg = AUTONEG_ENABLE;
+
+       return autoneg;
+}
+
+static u32 ptys_get_supported_port(struct mlx4_ptys_reg *ptys_reg)
+{
+       u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap);
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_T)
+                        | MLX4_PROT_MASK(MLX4_100BASE_TX))) {
+                       return SUPPORTED_TP;
+       }
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR)
+                        | MLX4_PROT_MASK(MLX4_10GBASE_SR)
+                        | MLX4_PROT_MASK(MLX4_56GBASE_SR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_CR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_SR4)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) {
+                       return SUPPORTED_FIBRE;
+       }
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_KR4)
+                        | MLX4_PROT_MASK(MLX4_20GBASE_KR2)
+                        | MLX4_PROT_MASK(MLX4_10GBASE_KR)
+                        | MLX4_PROT_MASK(MLX4_10GBASE_KX4)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_KX))) {
+                       return SUPPORTED_Backplane;
+       }
+       return 0;
+}
+
+static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg)
+{
+       u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_oper);
+
+       if (!eth_proto) /* link down */
+               eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap);
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_T)
+                        | MLX4_PROT_MASK(MLX4_100BASE_TX))) {
+                       return PORT_TP;
+       }
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_SR)
+                        | MLX4_PROT_MASK(MLX4_56GBASE_SR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_SR4)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) {
+                       return PORT_FIBRE;
+       }
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR)
+                        | MLX4_PROT_MASK(MLX4_56GBASE_CR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_CR4))) {
+                       return PORT_DA;
+       }
+
+       if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4)
+                        | MLX4_PROT_MASK(MLX4_40GBASE_KR4)
+                        | MLX4_PROT_MASK(MLX4_20GBASE_KR2)
+                        | MLX4_PROT_MASK(MLX4_10GBASE_KR)
+                        | MLX4_PROT_MASK(MLX4_10GBASE_KX4)
+                        | MLX4_PROT_MASK(MLX4_1000BASE_KX))) {
+                       return PORT_NONE;
+       }
+       return PORT_OTHER;
+}
+
+#define MLX4_LINK_MODES_SZ \
+       (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8)
+
+enum ethtool_report {
+       SUPPORTED = 0,
+       ADVERTISED = 1,
+       SPEED = 2
+};
+
+/* Translates mlx4 link mode to equivalent ethtool Link modes/speed */
+static u32 ptys2ethtool_map[MLX4_LINK_MODES_SZ][3] = {
+       [MLX4_100BASE_TX] = {
+               SUPPORTED_100baseT_Full,
+               ADVERTISED_100baseT_Full,
+               SPEED_100
+               },
+
+       [MLX4_1000BASE_T] = {
+               SUPPORTED_1000baseT_Full,
+               ADVERTISED_1000baseT_Full,
+               SPEED_1000
+               },
+       [MLX4_1000BASE_CX_SGMII] = {
+               SUPPORTED_1000baseKX_Full,
+               ADVERTISED_1000baseKX_Full,
+               SPEED_1000
+               },
+       [MLX4_1000BASE_KX] = {
+               SUPPORTED_1000baseKX_Full,
+               ADVERTISED_1000baseKX_Full,
+               SPEED_1000
+               },
+
+       [MLX4_10GBASE_T] = {
+               SUPPORTED_10000baseT_Full,
+               ADVERTISED_10000baseT_Full,
+               SPEED_10000
+               },
+       [MLX4_10GBASE_CX4] = {
+               SUPPORTED_10000baseKX4_Full,
+               ADVERTISED_10000baseKX4_Full,
+               SPEED_10000
+               },
+       [MLX4_10GBASE_KX4] = {
+               SUPPORTED_10000baseKX4_Full,
+               ADVERTISED_10000baseKX4_Full,
+               SPEED_10000
+               },
+       [MLX4_10GBASE_KR] = {
+               SUPPORTED_10000baseKR_Full,
+               ADVERTISED_10000baseKR_Full,
+               SPEED_10000
+               },
+       [MLX4_10GBASE_CR] = {
+               SUPPORTED_10000baseKR_Full,
+               ADVERTISED_10000baseKR_Full,
+               SPEED_10000
+               },
+       [MLX4_10GBASE_SR] = {
+               SUPPORTED_10000baseKR_Full,
+               ADVERTISED_10000baseKR_Full,
+               SPEED_10000
+               },
+
+       [MLX4_20GBASE_KR2] = {
+               SUPPORTED_20000baseMLD2_Full | SUPPORTED_20000baseKR2_Full,
+               ADVERTISED_20000baseMLD2_Full | ADVERTISED_20000baseKR2_Full,
+               SPEED_20000
+               },
+
+       [MLX4_40GBASE_CR4] = {
+               SUPPORTED_40000baseCR4_Full,
+               ADVERTISED_40000baseCR4_Full,
+               SPEED_40000
+               },
+       [MLX4_40GBASE_KR4] = {
+               SUPPORTED_40000baseKR4_Full,
+               ADVERTISED_40000baseKR4_Full,
+               SPEED_40000
+               },
+       [MLX4_40GBASE_SR4] = {
+               SUPPORTED_40000baseSR4_Full,
+               ADVERTISED_40000baseSR4_Full,
+               SPEED_40000
+               },
+
+       [MLX4_56GBASE_KR4] = {
+               SUPPORTED_56000baseKR4_Full,
+               ADVERTISED_56000baseKR4_Full,
+               SPEED_56000
+               },
+       [MLX4_56GBASE_CR4] = {
+               SUPPORTED_56000baseCR4_Full,
+               ADVERTISED_56000baseCR4_Full,
+               SPEED_56000
+               },
+       [MLX4_56GBASE_SR4] = {
+               SUPPORTED_56000baseSR4_Full,
+               ADVERTISED_56000baseSR4_Full,
+               SPEED_56000
+               },
+};
+
+static u32 ptys2ethtool_link_modes(u32 eth_proto, enum ethtool_report report)
+{
+       int i;
+       u32 link_modes = 0;
+
+       for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+               if (eth_proto & MLX4_PROT_MASK(i))
+                       link_modes |= ptys2ethtool_map[i][report];
+       }
+       return link_modes;
+}
+
+static u32 ethtool2ptys_link_modes(u32 link_modes, enum ethtool_report report)
+{
+       int i;
+       u32 ptys_modes = 0;
+
+       for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+               if (ptys2ethtool_map[i][report] & link_modes)
+                       ptys_modes |= 1 << i;
+       }
+       return ptys_modes;
+}
+
+/* Convert actual speed (SPEED_XXX) to ptys link modes */
+static u32 speed2ptys_link_modes(u32 speed)
+{
+       int i;
+       u32 ptys_modes = 0;
+
+       for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+               if (ptys2ethtool_map[i][SPEED] == speed)
+                       ptys_modes |= 1 << i;
+       }
+       return ptys_modes;
+}
+
+static int ethtool_get_ptys_settings(struct net_device *dev,
+                                    struct ethtool_cmd *cmd)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_ptys_reg ptys_reg;
+       u32 eth_proto;
+       int ret;
+
+       memset(&ptys_reg, 0, sizeof(ptys_reg));
+       ptys_reg.local_port = priv->port;
+       ptys_reg.proto_mask = MLX4_PTYS_EN;
+       ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev,
+                                  MLX4_ACCESS_REG_QUERY, &ptys_reg);
+       if (ret) {
+               en_warn(priv, "Failed to run mlx4_ACCESS_PTYS_REG status(%x)",
+                       ret);
+               return ret;
+       }
+       en_dbg(DRV, priv, "ptys_reg.proto_mask       %x\n",
+              ptys_reg.proto_mask);
+       en_dbg(DRV, priv, "ptys_reg.eth_proto_cap    %x\n",
+              be32_to_cpu(ptys_reg.eth_proto_cap));
+       en_dbg(DRV, priv, "ptys_reg.eth_proto_admin  %x\n",
+              be32_to_cpu(ptys_reg.eth_proto_admin));
+       en_dbg(DRV, priv, "ptys_reg.eth_proto_oper   %x\n",
+              be32_to_cpu(ptys_reg.eth_proto_oper));
+       en_dbg(DRV, priv, "ptys_reg.eth_proto_lp_adv %x\n",
+              be32_to_cpu(ptys_reg.eth_proto_lp_adv));
+
+       cmd->supported = 0;
+       cmd->advertising = 0;
+
+       cmd->supported |= ptys_get_supported_port(&ptys_reg);
+
+       eth_proto = be32_to_cpu(ptys_reg.eth_proto_cap);
+       cmd->supported |= ptys2ethtool_link_modes(eth_proto, SUPPORTED);
+
+       eth_proto = be32_to_cpu(ptys_reg.eth_proto_admin);
+       cmd->advertising |= ptys2ethtool_link_modes(eth_proto, ADVERTISED);
+
+       cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+       cmd->advertising |= (priv->prof->tx_pause) ? ADVERTISED_Pause : 0;
+
+       cmd->advertising |= (priv->prof->tx_pause ^ priv->prof->rx_pause) ?
+               ADVERTISED_Asym_Pause : 0;
+
+       cmd->port = ptys_get_active_port(&ptys_reg);
+       cmd->transceiver = (SUPPORTED_TP & cmd->supported) ?
+               XCVR_EXTERNAL : XCVR_INTERNAL;
+
+       if (mlx4_en_autoneg_get(dev)) {
+               cmd->supported |= SUPPORTED_Autoneg;
+               cmd->advertising |= ADVERTISED_Autoneg;
+       }
+
+       cmd->autoneg = (priv->port_state.flags & MLX4_EN_PORT_ANC) ?
+               AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+       eth_proto = be32_to_cpu(ptys_reg.eth_proto_lp_adv);
+       cmd->lp_advertising = ptys2ethtool_link_modes(eth_proto, ADVERTISED);
+
+       cmd->lp_advertising |= (priv->port_state.flags & MLX4_EN_PORT_ANC) ?
+                       ADVERTISED_Autoneg : 0;
+
+       cmd->phy_address = 0;
+       cmd->mdio_support = 0;
+       cmd->maxtxpkt = 0;
+       cmd->maxrxpkt = 0;
+       cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+       cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+
+       return ret;
+}
+
+static void ethtool_get_default_settings(struct net_device *dev,
+                                        struct ethtool_cmd *cmd)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int trans_type;
+
+       cmd->autoneg = AUTONEG_DISABLE;
+       cmd->supported = SUPPORTED_10000baseT_Full;
+       cmd->advertising = ADVERTISED_10000baseT_Full;
+       trans_type = priv->port_state.transceiver;
+
+       if (trans_type > 0 && trans_type <= 0xC) {
+               cmd->port = PORT_FIBRE;
+               cmd->transceiver = XCVR_EXTERNAL;
+               cmd->supported |= SUPPORTED_FIBRE;
+               cmd->advertising |= ADVERTISED_FIBRE;
+       } else if (trans_type == 0x80 || trans_type == 0) {
+               cmd->port = PORT_TP;
+               cmd->transceiver = XCVR_INTERNAL;
+               cmd->supported |= SUPPORTED_TP;
+               cmd->advertising |= ADVERTISED_TP;
+       } else  {
+               cmd->port = -1;
+               cmd->transceiver = -1;
+       }
+}
+
+static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int ret = -EINVAL;
+
+       if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+               return -ENOMEM;
+
+       en_dbg(DRV, priv, "query port state.flags ANC(%x) ANE(%x)\n",
+              priv->port_state.flags & MLX4_EN_PORT_ANC,
+              priv->port_state.flags & MLX4_EN_PORT_ANE);
+
+       if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL)
+               ret = ethtool_get_ptys_settings(dev, cmd);
+       if (ret) /* ETH PROT CRTL is not supported or PTYS CMD failed */
+               ethtool_get_default_settings(dev, cmd);
+
+       if (netif_carrier_ok(dev)) {
+               ethtool_cmd_speed_set(cmd, priv->port_state.link_speed);
+               cmd->duplex = DUPLEX_FULL;
+       } else {
+               ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+               cmd->duplex = DUPLEX_UNKNOWN;
+       }
+       return 0;
+}
+
+/* Calculate PTYS admin according ethtool speed (SPEED_XXX) */
+static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed,
+                                  __be32 proto_cap)
+{
+       __be32 proto_admin = 0;
+
+       if (!speed) { /* Speed = 0 ==> Reset Link modes */
+               proto_admin = proto_cap;
+               en_info(priv, "Speed was set to 0, Reset advertised Link Modes to default (%x)\n",
+                       be32_to_cpu(proto_cap));
+       } else {
+               u32 ptys_link_modes = speed2ptys_link_modes(speed);
+
+               proto_admin = cpu_to_be32(ptys_link_modes) & proto_cap;
+               en_info(priv, "Setting Speed to %d\n", speed);
+       }
+       return proto_admin;
+}
+
+static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_ptys_reg ptys_reg;
+       __be32 proto_admin;
+       int ret;
+
+       u32 ptys_adv = ethtool2ptys_link_modes(cmd->advertising, ADVERTISED);
+       int speed = ethtool_cmd_speed(cmd);
+
+       en_dbg(DRV, priv, "Set Speed=%d adv=0x%x autoneg=%d duplex=%d\n",
+              speed, cmd->advertising, cmd->autoneg, cmd->duplex);
+
+       if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) ||
+           (cmd->duplex == DUPLEX_HALF))
+               return -EINVAL;
+
+       memset(&ptys_reg, 0, sizeof(ptys_reg));
+       ptys_reg.local_port = priv->port;
+       ptys_reg.proto_mask = MLX4_PTYS_EN;
+       ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev,
+                                  MLX4_ACCESS_REG_QUERY, &ptys_reg);
+       if (ret) {
+               en_warn(priv, "Failed to QUERY mlx4_ACCESS_PTYS_REG status(%x)\n",
+                       ret);
+               return 0;
+       }
+
+       proto_admin = cmd->autoneg == AUTONEG_ENABLE ?
+               cpu_to_be32(ptys_adv) :
+               speed_set_ptys_admin(priv, speed,
+                                    ptys_reg.eth_proto_cap);
+
+       proto_admin &= ptys_reg.eth_proto_cap;
+       if (!proto_admin) {
+               en_warn(priv, "Not supported link mode(s) requested, check supported link modes.\n");
+               return -EINVAL; /* nothing to change due to bad input */
+       }
+
+       if (proto_admin == ptys_reg.eth_proto_admin)
+               return 0; /* Nothing to change */
+
+       en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n",
+              be32_to_cpu(proto_admin));
+
+       ptys_reg.eth_proto_admin = proto_admin;
+       ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, MLX4_ACCESS_REG_WRITE,
+                                  &ptys_reg);
+       if (ret) {
+               en_warn(priv, "Failed to write mlx4_ACCESS_PTYS_REG eth_proto_admin(0x%x) status(0x%x)",
+                       be32_to_cpu(ptys_reg.eth_proto_admin), ret);
+               return ret;
+       }
+
+       mutex_lock(&priv->mdev->state_lock);
+       if (priv->port_up) {
+               en_warn(priv, "Port link mode changed, restarting port...\n");
+               mlx4_en_stop_port(dev, 1);
+               if (mlx4_en_start_port(dev))
+                       en_err(priv, "Failed restarting port %d\n", priv->port);
+       }
+       mutex_unlock(&priv->mdev->state_lock);
+       return 0;
+}
+
+static int mlx4_en_get_coalesce(struct net_device *dev,
+                             struct ethtool_coalesce *coal)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       coal->tx_coalesce_usecs = priv->tx_usecs;
+       coal->tx_max_coalesced_frames = priv->tx_frames;
+       coal->tx_max_coalesced_frames_irq = priv->tx_work_limit;
+
+       coal->rx_coalesce_usecs = priv->rx_usecs;
+       coal->rx_max_coalesced_frames = priv->rx_frames;
+
+       coal->pkt_rate_low = priv->pkt_rate_low;
+       coal->rx_coalesce_usecs_low = priv->rx_usecs_low;
+       coal->pkt_rate_high = priv->pkt_rate_high;
+       coal->rx_coalesce_usecs_high = priv->rx_usecs_high;
+       coal->rate_sample_interval = priv->sample_interval;
+       coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal;
+
+       return 0;
+}
+
+static int mlx4_en_set_coalesce(struct net_device *dev,
+                             struct ethtool_coalesce *coal)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       if (!coal->tx_max_coalesced_frames_irq)
+               return -EINVAL;
+
+       priv->rx_frames = (coal->rx_max_coalesced_frames ==
+                          MLX4_EN_AUTO_CONF) ?
+                               MLX4_EN_RX_COAL_TARGET :
+                               coal->rx_max_coalesced_frames;
+       priv->rx_usecs = (coal->rx_coalesce_usecs ==
+                         MLX4_EN_AUTO_CONF) ?
+                               MLX4_EN_RX_COAL_TIME :
+                               coal->rx_coalesce_usecs;
+
+       /* Setting TX coalescing parameters */
+       if (coal->tx_coalesce_usecs != priv->tx_usecs ||
+           coal->tx_max_coalesced_frames != priv->tx_frames) {
+               priv->tx_usecs = coal->tx_coalesce_usecs;
+               priv->tx_frames = coal->tx_max_coalesced_frames;
+       }
+
+       /* Set adaptive coalescing params */
+       priv->pkt_rate_low = coal->pkt_rate_low;
+       priv->rx_usecs_low = coal->rx_coalesce_usecs_low;
+       priv->pkt_rate_high = coal->pkt_rate_high;
+       priv->rx_usecs_high = coal->rx_coalesce_usecs_high;
+       priv->sample_interval = coal->rate_sample_interval;
+       priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce;
+       priv->tx_work_limit = coal->tx_max_coalesced_frames_irq;
+
+       return mlx4_en_moderation_update(priv);
+}
+
+static int mlx4_en_set_pauseparam(struct net_device *dev,
+                               struct ethtool_pauseparam *pause)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err;
+
+       if (pause->autoneg)
+               return -EINVAL;
+
+       priv->prof->tx_pause = pause->tx_pause != 0;
+       priv->prof->rx_pause = pause->rx_pause != 0;
+       err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+                                   priv->rx_skb_size + ETH_FCS_LEN,
+                                   priv->prof->tx_pause,
+                                   priv->prof->tx_ppp,
+                                   priv->prof->rx_pause,
+                                   priv->prof->rx_ppp);
+       if (err)
+               en_err(priv, "Failed setting pause params\n");
+       else
+               mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+                                               priv->prof->rx_ppp,
+                                               priv->prof->rx_pause,
+                                               priv->prof->tx_ppp,
+                                               priv->prof->tx_pause);
+
+       return err;
+}
+
+static void mlx4_en_get_pauseparam(struct net_device *dev,
+                                struct ethtool_pauseparam *pause)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       pause->tx_pause = priv->prof->tx_pause;
+       pause->rx_pause = priv->prof->rx_pause;
+}
+
+static int mlx4_en_set_ringparam(struct net_device *dev,
+                                struct ethtool_ringparam *param)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       u32 rx_size, tx_size;
+       int port_up = 0;
+       int err = 0;
+
+       if (param->rx_jumbo_pending || param->rx_mini_pending)
+               return -EINVAL;
+
+       rx_size = roundup_pow_of_two(param->rx_pending);
+       rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE);
+       rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE);
+       tx_size = roundup_pow_of_two(param->tx_pending);
+       tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE);
+       tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE);
+
+       if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
+                                       priv->rx_ring[0]->size) &&
+           tx_size == priv->tx_ring[0]->size)
+               return 0;
+
+       mutex_lock(&mdev->state_lock);
+       if (priv->port_up) {
+               port_up = 1;
+               mlx4_en_stop_port(dev, 1);
+       }
+
+       mlx4_en_free_resources(priv);
+
+       priv->prof->tx_ring_size = tx_size;
+       priv->prof->rx_ring_size = rx_size;
+
+       err = mlx4_en_alloc_resources(priv);
+       if (err) {
+               en_err(priv, "Failed reallocating port resources\n");
+               goto out;
+       }
+       if (port_up) {
+               err = mlx4_en_start_port(dev);
+               if (err)
+                       en_err(priv, "Failed starting port\n");
+       }
+
+       err = mlx4_en_moderation_update(priv);
+
+out:
+       mutex_unlock(&mdev->state_lock);
+       return err;
+}
+
+static void mlx4_en_get_ringparam(struct net_device *dev,
+                                 struct ethtool_ringparam *param)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       memset(param, 0, sizeof(*param));
+       param->rx_max_pending = MLX4_EN_MAX_RX_SIZE;
+       param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
+       param->rx_pending = priv->port_up ?
+               priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
+       param->tx_pending = priv->tx_ring[0]->size;
+}
+
+static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       return priv->rx_ring_num;
+}
+
+static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
+{
+       return MLX4_EN_RSS_KEY_SIZE;
+}
+
+static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       /* check if requested function is supported by the device */
+       if (hfunc == ETH_RSS_HASH_TOP) {
+               if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP))
+                       return -EINVAL;
+               if (!(dev->features & NETIF_F_RXHASH))
+                       en_warn(priv, "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n");
+               return 0;
+       } else if (hfunc == ETH_RSS_HASH_XOR) {
+               if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))
+                       return -EINVAL;
+               if (dev->features & NETIF_F_RXHASH)
+                       en_warn(priv, "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n");
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
+                           u8 *hfunc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_rss_map *rss_map = &priv->rss_map;
+       int rss_rings;
+       size_t n = priv->rx_ring_num;
+       int err = 0;
+
+       rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num;
+       rss_rings = 1 << ilog2(rss_rings);
+
+       while (n--) {
+               if (!ring_index)
+                       break;
+               ring_index[n] = rss_map->qps[n % rss_rings].qpn -
+                       rss_map->base_qpn;
+       }
+       if (key)
+               memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
+       if (hfunc)
+               *hfunc = priv->rss_hash_fn;
+       return err;
+}
+
+static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
+                           const u8 *key, const u8 hfunc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int port_up = 0;
+       int err = 0;
+       int i;
+       int rss_rings = 0;
+
+       /* Calculate RSS table size and make sure flows are spread evenly
+        * between rings
+        */
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               if (!ring_index)
+                       continue;
+               if (i > 0 && !ring_index[i] && !rss_rings)
+                       rss_rings = i;
+
+               if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num)))
+                       return -EINVAL;
+       }
+
+       if (!rss_rings)
+               rss_rings = priv->rx_ring_num;
+
+       /* RSS table size must be an order of 2 */
+       if (!is_power_of_2(rss_rings))
+               return -EINVAL;
+
+       if (hfunc != ETH_RSS_HASH_NO_CHANGE) {
+               err = mlx4_en_check_rxfh_func(dev, hfunc);
+               if (err)
+                       return err;
+       }
+
+       mutex_lock(&mdev->state_lock);
+       if (priv->port_up) {
+               port_up = 1;
+               mlx4_en_stop_port(dev, 1);
+       }
+
+       if (ring_index)
+               priv->prof->rss_rings = rss_rings;
+       if (key)
+               memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE);
+       if (hfunc !=  ETH_RSS_HASH_NO_CHANGE)
+               priv->rss_hash_fn = hfunc;
+
+       if (port_up) {
+               err = mlx4_en_start_port(dev);
+               if (err)
+                       en_err(priv, "Failed starting port\n");
+       }
+
+       mutex_unlock(&mdev->state_lock);
+       return err;
+}
+
+#define all_zeros_or_all_ones(field)           \
+       ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int mlx4_en_validate_flow(struct net_device *dev,
+                                struct ethtool_rxnfc *cmd)
+{
+       struct ethtool_usrip4_spec *l3_mask;
+       struct ethtool_tcpip4_spec *l4_mask;
+       struct ethhdr *eth_mask;
+
+       if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+               return -EINVAL;
+
+       if (cmd->fs.flow_type & FLOW_MAC_EXT) {
+               /* dest mac mask must be ff:ff:ff:ff:ff:ff */
+               if (!is_broadcast_ether_addr(cmd->fs.m_ext.h_dest))
+                       return -EINVAL;
+       }
+
+       switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+               if (cmd->fs.m_u.tcp_ip4_spec.tos)
+                       return -EINVAL;
+               l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+               /* don't allow mask which isn't all 0 or 1 */
+               if (!all_zeros_or_all_ones(l4_mask->ip4src) ||
+                   !all_zeros_or_all_ones(l4_mask->ip4dst) ||
+                   !all_zeros_or_all_ones(l4_mask->psrc) ||
+                   !all_zeros_or_all_ones(l4_mask->pdst))
+                       return -EINVAL;
+               break;
+       case IP_USER_FLOW:
+               l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+               if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto ||
+                   cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
+                   (!l3_mask->ip4src && !l3_mask->ip4dst) ||
+                   !all_zeros_or_all_ones(l3_mask->ip4src) ||
+                   !all_zeros_or_all_ones(l3_mask->ip4dst))
+                       return -EINVAL;
+               break;
+       case ETHER_FLOW:
+               eth_mask = &cmd->fs.m_u.ether_spec;
+               /* source mac mask must not be set */
+               if (!is_zero_ether_addr(eth_mask->h_source))
+                       return -EINVAL;
+
+               /* dest mac mask must be ff:ff:ff:ff:ff:ff */
+               if (!is_broadcast_ether_addr(eth_mask->h_dest))
+                       return -EINVAL;
+
+               if (!all_zeros_or_all_ones(eth_mask->h_proto))
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if ((cmd->fs.flow_type & FLOW_EXT)) {
+               if (cmd->fs.m_ext.vlan_etype ||
+                   !((cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) ==
+                     0 ||
+                     (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) ==
+                     cpu_to_be16(VLAN_VID_MASK)))
+                       return -EINVAL;
+
+               if (cmd->fs.m_ext.vlan_tci) {
+                       if (be16_to_cpu(cmd->fs.h_ext.vlan_tci) >= VLAN_N_VID)
+                               return -EINVAL;
+
+               }
+       }
+
+       return 0;
+}
+
+static int mlx4_en_ethtool_add_mac_rule(struct ethtool_rxnfc *cmd,
+                                       struct list_head *rule_list_h,
+                                       struct mlx4_spec_list *spec_l2,
+                                       unsigned char *mac)
+{
+       int err = 0;
+       __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+       spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+       memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+       memcpy(spec_l2->eth.dst_mac, mac, ETH_ALEN);
+
+       if ((cmd->fs.flow_type & FLOW_EXT) &&
+           (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) {
+               spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
+               spec_l2->eth.vlan_id_msk = cpu_to_be16(VLAN_VID_MASK);
+       }
+
+       list_add_tail(&spec_l2->list, rule_list_h);
+
+       return err;
+}
+
+static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv,
+                                               struct ethtool_rxnfc *cmd,
+                                               struct list_head *rule_list_h,
+                                               struct mlx4_spec_list *spec_l2,
+                                               __be32 ipv4_dst)
+{
+#ifdef CONFIG_INET
+       unsigned char mac[ETH_ALEN];
+
+       if (!ipv4_is_multicast(ipv4_dst)) {
+               if (cmd->fs.flow_type & FLOW_MAC_EXT)
+                       memcpy(&mac, cmd->fs.h_ext.h_dest, ETH_ALEN);
+               else
+                       memcpy(&mac, priv->dev->dev_addr, ETH_ALEN);
+       } else {
+               ip_eth_mc_map(ipv4_dst, mac);
+       }
+
+       return mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, &mac[0]);
+#else
+       return -EINVAL;
+#endif
+}
+
+static int add_ip_rule(struct mlx4_en_priv *priv,
+                      struct ethtool_rxnfc *cmd,
+                      struct list_head *list_h)
+{
+       int err;
+       struct mlx4_spec_list *spec_l2 = NULL;
+       struct mlx4_spec_list *spec_l3 = NULL;
+       struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+
+       spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL);
+       spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+       if (!spec_l2 || !spec_l3) {
+               err = -ENOMEM;
+               goto free_spec;
+       }
+
+       err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2,
+                                                  cmd->fs.h_u.
+                                                  usr_ip4_spec.ip4dst);
+       if (err)
+               goto free_spec;
+       spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+       spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
+       if (l3_mask->ip4src)
+               spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+       spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
+       if (l3_mask->ip4dst)
+               spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+       list_add_tail(&spec_l3->list, list_h);
+
+       return 0;
+
+free_spec:
+       kfree(spec_l2);
+       kfree(spec_l3);
+       return err;
+}
+
+static int add_tcp_udp_rule(struct mlx4_en_priv *priv,
+                            struct ethtool_rxnfc *cmd,
+                            struct list_head *list_h, int proto)
+{
+       int err;
+       struct mlx4_spec_list *spec_l2 = NULL;
+       struct mlx4_spec_list *spec_l3 = NULL;
+       struct mlx4_spec_list *spec_l4 = NULL;
+       struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+
+       spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+       spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL);
+       spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL);
+       if (!spec_l2 || !spec_l3 || !spec_l4) {
+               err = -ENOMEM;
+               goto free_spec;
+       }
+
+       spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+
+       if (proto == TCP_V4_FLOW) {
+               err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h,
+                                                          spec_l2,
+                                                          cmd->fs.h_u.
+                                                          tcp_ip4_spec.ip4dst);
+               if (err)
+                       goto free_spec;
+               spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+               spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
+               spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
+               spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
+               spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
+       } else {
+               err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h,
+                                                          spec_l2,
+                                                          cmd->fs.h_u.
+                                                          udp_ip4_spec.ip4dst);
+               if (err)
+                       goto free_spec;
+               spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+               spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
+               spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
+               spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
+               spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
+       }
+
+       if (l4_mask->ip4src)
+               spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+       if (l4_mask->ip4dst)
+               spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+
+       if (l4_mask->psrc)
+               spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
+       if (l4_mask->pdst)
+               spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;
+
+       list_add_tail(&spec_l3->list, list_h);
+       list_add_tail(&spec_l4->list, list_h);
+
+       return 0;
+
+free_spec:
+       kfree(spec_l2);
+       kfree(spec_l3);
+       kfree(spec_l4);
+       return err;
+}
+
+static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
+                                            struct ethtool_rxnfc *cmd,
+                                            struct list_head *rule_list_h)
+{
+       int err;
+       struct ethhdr *eth_spec;
+       struct mlx4_spec_list *spec_l2;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       err = mlx4_en_validate_flow(dev, cmd);
+       if (err)
+               return err;
+
+       switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+       case ETHER_FLOW:
+               spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+               if (!spec_l2)
+                       return -ENOMEM;
+
+               eth_spec = &cmd->fs.h_u.ether_spec;
+               mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2,
+                                            &eth_spec->h_dest[0]);
+               spec_l2->eth.ether_type = eth_spec->h_proto;
+               if (eth_spec->h_proto)
+                       spec_l2->eth.ether_type_enable = 1;
+               break;
+       case IP_USER_FLOW:
+               err = add_ip_rule(priv, cmd, rule_list_h);
+               break;
+       case TCP_V4_FLOW:
+               err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
+               break;
+       case UDP_V4_FLOW:
+               err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
+               break;
+       }
+
+       return err;
+}
+
+static int mlx4_en_flow_replace(struct net_device *dev,
+                               struct ethtool_rxnfc *cmd)
+{
+       int err;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct ethtool_flow_id *loc_rule;
+       struct mlx4_spec_list *spec, *tmp_spec;
+       u32 qpn;
+       u64 reg_id;
+
+       struct mlx4_net_trans_rule rule = {
+               .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+               .exclusive = 0,
+               .allow_loopback = 1,
+               .promisc_mode = MLX4_FS_REGULAR,
+       };
+
+       rule.port = priv->port;
+       rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
+       INIT_LIST_HEAD(&rule.list);
+
+       /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
+       if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
+               qpn = priv->drop_qp.qpn;
+       else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
+               qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
+       } else {
+               if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
+                       en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
+                               cmd->fs.ring_cookie);
+                       return -EINVAL;
+               }
+               qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn;
+               if (!qpn) {
+                       en_warn(priv, "rxnfc: RX ring (%llu) is inactive\n",
+                               cmd->fs.ring_cookie);
+                       return -EINVAL;
+               }
+       }
+       rule.qpn = qpn;
+       err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list);
+       if (err)
+               goto out_free_list;
+
+       loc_rule = &priv->ethtool_rules[cmd->fs.location];
+       if (loc_rule->id) {
+               err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id);
+               if (err) {
+                       en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n",
+                              cmd->fs.location, loc_rule->id);
+                       goto out_free_list;
+               }
+               loc_rule->id = 0;
+               memset(&loc_rule->flow_spec, 0,
+                      sizeof(struct ethtool_rx_flow_spec));
+               list_del(&loc_rule->list);
+       }
+       err = mlx4_flow_attach(priv->mdev->dev, &rule, &reg_id);
+       if (err) {
+               en_err(priv, "Fail to attach network rule at location %d\n",
+                      cmd->fs.location);
+               goto out_free_list;
+       }
+       loc_rule->id = reg_id;
+       memcpy(&loc_rule->flow_spec, &cmd->fs,
+              sizeof(struct ethtool_rx_flow_spec));
+       list_add_tail(&loc_rule->list, &priv->ethtool_list);
+
+out_free_list:
+       list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+               list_del(&spec->list);
+               kfree(spec);
+       }
+       return err;
+}
+
+static int mlx4_en_flow_detach(struct net_device *dev,
+                              struct ethtool_rxnfc *cmd)
+{
+       int err = 0;
+       struct ethtool_flow_id *rule;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+               return -EINVAL;
+
+       rule = &priv->ethtool_rules[cmd->fs.location];
+       if (!rule->id) {
+               err =  -ENOENT;
+               goto out;
+       }
+
+       err = mlx4_flow_detach(priv->mdev->dev, rule->id);
+       if (err) {
+               en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n",
+                      cmd->fs.location, rule->id);
+               goto out;
+       }
+       rule->id = 0;
+       memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec));
+       list_del(&rule->list);
+out:
+       return err;
+
+}
+
+static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd,
+                           int loc)
+{
+       int err = 0;
+       struct ethtool_flow_id *rule;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES)
+               return -EINVAL;
+
+       rule = &priv->ethtool_rules[loc];
+       if (rule->id)
+               memcpy(&cmd->fs, &rule->flow_spec,
+                      sizeof(struct ethtool_rx_flow_spec));
+       else
+               err = -ENOENT;
+
+       return err;
+}
+
+static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv)
+{
+
+       int i, res = 0;
+       for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) {
+               if (priv->ethtool_rules[i].id)
+                       res++;
+       }
+       return res;
+
+}
+
+static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+                            u32 *rule_locs)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err = 0;
+       int i = 0, priority = 0;
+
+       if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT ||
+            cmd->cmd == ETHTOOL_GRXCLSRULE ||
+            cmd->cmd == ETHTOOL_GRXCLSRLALL) &&
+           (mdev->dev->caps.steering_mode !=
+            MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up))
+               return -EINVAL;
+
+       switch (cmd->cmd) {
+       case ETHTOOL_GRXRINGS:
+               cmd->data = priv->rx_ring_num;
+               break;
+       case ETHTOOL_GRXCLSRLCNT:
+               cmd->rule_cnt = mlx4_en_get_num_flows(priv);
+               break;
+       case ETHTOOL_GRXCLSRULE:
+               err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
+               break;
+       case ETHTOOL_GRXCLSRLALL:
+               while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
+                       err = mlx4_en_get_flow(dev, cmd, i);
+                       if (!err)
+                               rule_locs[priority++] = i;
+                       i++;
+               }
+               err = 0;
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               break;
+       }
+
+       return err;
+}
+
+static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+       int err = 0;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+
+       if (mdev->dev->caps.steering_mode !=
+           MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up)
+               return -EINVAL;
+
+       switch (cmd->cmd) {
+       case ETHTOOL_SRXCLSRLINS:
+               err = mlx4_en_flow_replace(dev, cmd);
+               break;
+       case ETHTOOL_SRXCLSRLDEL:
+               err = mlx4_en_flow_detach(dev, cmd);
+               break;
+       default:
+               en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd);
+               return -EINVAL;
+       }
+
+       return err;
+}
+
+static void mlx4_en_get_channels(struct net_device *dev,
+                                struct ethtool_channels *channel)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       memset(channel, 0, sizeof(*channel));
+
+       channel->max_rx = MAX_RX_RINGS;
+       channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
+
+       channel->rx_count = priv->rx_ring_num;
+       channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP;
+}
+
+static int mlx4_en_set_channels(struct net_device *dev,
+                               struct ethtool_channels *channel)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int port_up = 0;
+       int err = 0;
+
+       if (channel->other_count || channel->combined_count ||
+           channel->tx_count > MLX4_EN_MAX_TX_RING_P_UP ||
+           channel->rx_count > MAX_RX_RINGS ||
+           !channel->tx_count || !channel->rx_count)
+               return -EINVAL;
+
+       mutex_lock(&mdev->state_lock);
+       if (priv->port_up) {
+               port_up = 1;
+               mlx4_en_stop_port(dev, 1);
+       }
+
+       mlx4_en_free_resources(priv);
+
+       priv->num_tx_rings_p_up = channel->tx_count;
+       priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+       priv->rx_ring_num = channel->rx_count;
+
+       err = mlx4_en_alloc_resources(priv);
+       if (err) {
+               en_err(priv, "Failed reallocating port resources\n");
+               goto out;
+       }
+
+       netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+       netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
+
+       if (dev->num_tc)
+               mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
+
+       en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num);
+       en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
+
+       if (port_up) {
+               err = mlx4_en_start_port(dev);
+               if (err)
+                       en_err(priv, "Failed starting port\n");
+       }
+
+       err = mlx4_en_moderation_update(priv);
+
+out:
+       mutex_unlock(&mdev->state_lock);
+       return err;
+}
+
+static int mlx4_en_get_ts_info(struct net_device *dev,
+                              struct ethtool_ts_info *info)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int ret;
+
+       ret = ethtool_op_get_ts_info(dev, info);
+       if (ret)
+               return ret;
+
+       if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
+               info->so_timestamping |=
+                       SOF_TIMESTAMPING_TX_HARDWARE |
+                       SOF_TIMESTAMPING_RX_HARDWARE |
+                       SOF_TIMESTAMPING_RAW_HARDWARE;
+
+               info->tx_types =
+                       (1 << HWTSTAMP_TX_OFF) |
+                       (1 << HWTSTAMP_TX_ON);
+
+               info->rx_filters =
+                       (1 << HWTSTAMP_FILTER_NONE) |
+                       (1 << HWTSTAMP_FILTER_ALL);
+
+               if (mdev->ptp_clock)
+                       info->phc_index = ptp_clock_index(mdev->ptp_clock);
+       }
+
+       return ret;
+}
+
+static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+       bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+       int i;
+
+       if (bf_enabled_new == bf_enabled_old)
+               return 0; /* Nothing to do */
+
+       if (bf_enabled_new) {
+               bool bf_supported = true;
+
+               for (i = 0; i < priv->tx_ring_num; i++)
+                       bf_supported &= priv->tx_ring[i]->bf_alloced;
+
+               if (!bf_supported) {
+                       en_err(priv, "BlueFlame is not supported\n");
+                       return -EINVAL;
+               }
+
+               priv->pflags |= MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+       } else {
+               priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+       }
+
+       for (i = 0; i < priv->tx_ring_num; i++)
+               priv->tx_ring[i]->bf_enabled = bf_enabled_new;
+
+       en_info(priv, "BlueFlame %s\n",
+               bf_enabled_new ?  "Enabled" : "Disabled");
+
+       return 0;
+}
+
+static u32 mlx4_en_get_priv_flags(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       return priv->pflags;
+}
+
+static int mlx4_en_get_tunable(struct net_device *dev,
+                              const struct ethtool_tunable *tuna,
+                              void *data)
+{
+       const struct mlx4_en_priv *priv = netdev_priv(dev);
+       int ret = 0;
+
+       switch (tuna->id) {
+       case ETHTOOL_TX_COPYBREAK:
+               *(u32 *)data = priv->prof->inline_thold;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static int mlx4_en_set_tunable(struct net_device *dev,
+                              const struct ethtool_tunable *tuna,
+                              const void *data)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       int val, ret = 0;
+
+       switch (tuna->id) {
+       case ETHTOOL_TX_COPYBREAK:
+               val = *(u32 *)data;
+               if (val < MIN_PKT_LEN || val > MAX_INLINE)
+                       ret = -EINVAL;
+               else
+                       priv->prof->inline_thold = val;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static int mlx4_en_get_module_info(struct net_device *dev,
+                                  struct ethtool_modinfo *modinfo)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int ret;
+       u8 data[4];
+
+       /* Read first 2 bytes to get Module & REV ID */
+       ret = mlx4_get_module_info(mdev->dev, priv->port,
+                                  0/*offset*/, 2/*size*/, data);
+       if (ret < 2)
+               return -EIO;
+
+       switch (data[0] /* identifier */) {
+       case MLX4_MODULE_ID_QSFP:
+               modinfo->type = ETH_MODULE_SFF_8436;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+               break;
+       case MLX4_MODULE_ID_QSFP_PLUS:
+               if (data[1] >= 0x3) { /* revision id */
+                       modinfo->type = ETH_MODULE_SFF_8636;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8436;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+               }
+               break;
+       case MLX4_MODULE_ID_QSFP28:
+               modinfo->type = ETH_MODULE_SFF_8636;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+               break;
+       case MLX4_MODULE_ID_SFP:
+               modinfo->type = ETH_MODULE_SFF_8472;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               break;
+       default:
+               return -ENOSYS;
+       }
+
+       return 0;
+}
+
+static int mlx4_en_get_module_eeprom(struct net_device *dev,
+                                    struct ethtool_eeprom *ee,
+                                    u8 *data)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int offset = ee->offset;
+       int i = 0, ret;
+
+       if (ee->len == 0)
+               return -EINVAL;
+
+       memset(data, 0, ee->len);
+
+       while (i < ee->len) {
+               en_dbg(DRV, priv,
+                      "mlx4_get_module_info i(%d) offset(%d) len(%d)\n",
+                      i, offset, ee->len - i);
+
+               ret = mlx4_get_module_info(mdev->dev, priv->port,
+                                          offset, ee->len - i, data + i);
+
+               if (!ret) /* Done reading */
+                       return 0;
+
+               if (ret < 0) {
+                       en_err(priv,
+                              "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n",
+                              i, offset, ee->len - i, ret);
+                       return 0;
+               }
+
+               i += ret;
+               offset += ret;
+       }
+       return 0;
+}
+
+static int mlx4_en_set_phys_id(struct net_device *dev,
+                              enum ethtool_phys_id_state state)
+{
+       int err;
+       u16 beacon_duration;
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+
+       if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_BEACON))
+               return -EOPNOTSUPP;
+
+       switch (state) {
+       case ETHTOOL_ID_ACTIVE:
+               beacon_duration = PORT_BEACON_MAX_LIMIT;
+               break;
+       case ETHTOOL_ID_INACTIVE:
+               beacon_duration = 0;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = mlx4_SET_PORT_BEACON(mdev->dev, priv->port, beacon_duration);
+       return err;
+}
+
+const struct ethtool_ops mlx4_en_ethtool_ops = {
+       .get_drvinfo = mlx4_en_get_drvinfo,
+       .get_settings = mlx4_en_get_settings,
+       .set_settings = mlx4_en_set_settings,
+       .get_link = ethtool_op_get_link,
+       .get_strings = mlx4_en_get_strings,
+       .get_sset_count = mlx4_en_get_sset_count,
+       .get_ethtool_stats = mlx4_en_get_ethtool_stats,
+       .self_test = mlx4_en_self_test,
+       .set_phys_id = mlx4_en_set_phys_id,
+       .get_wol = mlx4_en_get_wol,
+       .set_wol = mlx4_en_set_wol,
+       .get_msglevel = mlx4_en_get_msglevel,
+       .set_msglevel = mlx4_en_set_msglevel,
+       .get_coalesce = mlx4_en_get_coalesce,
+       .set_coalesce = mlx4_en_set_coalesce,
+       .get_pauseparam = mlx4_en_get_pauseparam,
+       .set_pauseparam = mlx4_en_set_pauseparam,
+       .get_ringparam = mlx4_en_get_ringparam,
+       .set_ringparam = mlx4_en_set_ringparam,
+       .get_rxnfc = mlx4_en_get_rxnfc,
+       .set_rxnfc = mlx4_en_set_rxnfc,
+       .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
+       .get_rxfh_key_size = mlx4_en_get_rxfh_key_size,
+       .get_rxfh = mlx4_en_get_rxfh,
+       .set_rxfh = mlx4_en_set_rxfh,
+       .get_channels = mlx4_en_get_channels,
+       .set_channels = mlx4_en_set_channels,
+       .get_ts_info = mlx4_en_get_ts_info,
+       .set_priv_flags = mlx4_en_set_priv_flags,
+       .get_priv_flags = mlx4_en_get_priv_flags,
+       .get_tunable            = mlx4_en_get_tunable,
+       .set_tunable            = mlx4_en_set_tunable,
+       .get_module_info = mlx4_en_get_module_info,
+       .get_module_eeprom = mlx4_en_get_module_eeprom
+};
+
+
+
+
+
diff --git a/kern/drivers/net/mlx4/en_main.c b/kern/drivers/net/mlx4/en_main.c
new file mode 100644 (file)
index 0000000..913b716
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")");
+
+static const char mlx4_en_version[] =
+       DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v"
+       DRV_VERSION " (" DRV_RELDATE ")\n";
+
+#define MLX4_EN_PARM_INT(X, def_val, desc) \
+       static unsigned int X = def_val;\
+       module_param(X , uint, 0444); \
+       MODULE_PARM_DESC(X, desc);
+
+
+/*
+ * Device scope module parameters
+ */
+
+/* Enable RSS UDP traffic */
+MLX4_EN_PARM_INT(udp_rss, 1,
+                "Enable RSS for incoming UDP traffic or disabled (0)");
+
+/* Priority pausing */
+MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]."
+                          " Per priority bit mask");
+MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]."
+                          " Per priority bit mask");
+
+MLX4_EN_PARM_INT(inline_thold, MAX_INLINE,
+                "Threshold for using inline data (range: 17-104, default: 104)");
+
+#define MAX_PFC_TX     0xff
+#define MAX_PFC_RX     0xff
+
+void en_print(const char *level, const struct mlx4_en_priv *priv,
+             const char *format, ...)
+{
+       va_list args;
+       struct va_format vaf;
+
+       va_start(args, format);
+
+       vaf.fmt = format;
+       vaf.va = &args;
+       if (priv->registered)
+               printk("%s%s: %s: %pV",
+                      level, DRV_NAME, priv->dev->name, &vaf);
+       else
+               printk("%s%s: %s: Port %d: %pV",
+                      level, DRV_NAME, dev_name(&priv->mdev->pdev->dev),
+                      priv->port, &vaf);
+       va_end(args);
+}
+
+void mlx4_en_update_loopback_state(struct net_device *dev,
+                                  netdev_features_t features)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       if (features & NETIF_F_LOOPBACK)
+               priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+       else
+               priv->ctrl_flags &= cpu_to_be32(~MLX4_WQE_CTRL_FORCE_LOOPBACK);
+
+       priv->flags &= ~(MLX4_EN_FLAG_RX_FILTER_NEEDED|
+                       MLX4_EN_FLAG_ENABLE_HW_LOOPBACK);
+
+       /* Drop the packet if SRIOV is not enabled
+        * and not performing the selftest or flb disabled
+        */
+       if (mlx4_is_mfunc(priv->mdev->dev) &&
+           !(features & NETIF_F_LOOPBACK) && !priv->validate_loopback)
+               priv->flags |= MLX4_EN_FLAG_RX_FILTER_NEEDED;
+
+       /* Set dmac in Tx WQE if we are in SRIOV mode or if loopback selftest
+        * is requested
+        */
+       if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)
+               priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
+}
+
+static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
+{
+       struct mlx4_en_profile *params = &mdev->profile;
+       int i;
+
+       params->udp_rss = udp_rss;
+       params->num_tx_rings_p_up = mlx4_low_memory_profile() ?
+               MLX4_EN_MIN_TX_RING_P_UP :
+               min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP);
+
+       if (params->udp_rss && !(mdev->dev->caps.flags
+                                       & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
+               mlx4_warn(mdev, "UDP RSS is not sup