summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx4/fw.c
diff options
context:
space:
mode:
authorIdo Shamay <idos@mellanox.com>2014-09-18 11:50:59 +0300
committerDavid S. Miller <davem@davemloft.net>2014-09-19 17:30:10 -0400
commit77507aa249aecd06fa25ad058b64481e46887a01 (patch)
tree814aa07a0d83e7cfee705f512cd6afb2d303ed78 /drivers/net/ethernet/mellanox/mlx4/fw.c
parent54003f119c26573d3bb86a5efc64f3e5fd43b8c6 (diff)
net/mlx4_core: Enable CQE/EQE stride support
This feature is intended for archs having cache line larger then 64B. Since our CQE/EQEs are generally 64B in those systems, HW will write twice to the same cache line consecutively, causing pipe locks due to he hazard prevention mechanism. For elements in a cyclic buffer, writes are consecutive, so entries smaller than a cache line should be avoided, especially if they are written at a high rate. Reduce consecutive writes to same cache line in CQs/EQs, by allowing the driver to increase the distance between entries so that each will reside in a different cache line. Until the introduction of this feature, there were two types of CQE/EQE: 1. 32B stride and context in the [0-31] segment 2. 64B stride and context in the [32-63] segment This feature introduces two additional types: 3. 128B stride and context in the [0-31] segment (128B cache line) 4. 256B stride and context in the [0-31] segment (256B cache line) Modify the mlx4_core driver to query the device for the CQE/EQE cache line stride capability and to enable that capability when the host cache line size is larger than 64 bytes (supported cache lines are 128B and 256B). The mlx4 IB driver and libmlx4 need not be aware of this change. The PF context behaviour is changed to require this change in VF drivers running on such archs. Signed-off-by: Ido Shamay <idos@mellanox.com> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/fw.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c38
1 files changed, 36 insertions, 2 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 494753e44ae3..13b2e4a51ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[8] = "Dynamic QP updates support",
[9] = "Device managed flow steering IPoIB support",
[10] = "TCP/IP offloads/flow-steering for VXLAN support",
- [11] = "MAD DEMUX (Secure-Host) support"
+ [11] = "MAD DEMUX (Secure-Host) support",
+ [12] = "Large cache line (>64B) CQE stride support",
+ [13] = "Large cache line (>64B) EQE stride support"
};
int i;
@@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
+#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
@@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->max_rq_sg = field;
MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
dev_cap->max_rq_desc_sz = size;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+ if (field & (1 << 6))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ if (field & (1 << 7))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
@@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38)
+#define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b)
#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
@@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
dev->caps.cqe_size = 64;
- dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
} else {
dev->caps.cqe_size = 32;
}
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
+ dev->caps.eqe_size = cache_line_size();
+ dev->caps.cqe_size = cache_line_size();
+ dev->caps.eqe_factor = 0;
+ MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
+ (ilog2(dev->caps.eqe_size) - 5)),
+ INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+
+ /* User still need to know to support CQE > 32B */
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ }
+
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
if (byte_field & 0x40) /* 64-bytes cqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+ if (byte_field) {
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+ param->cqe_size = 1 << ((byte_field &
+ MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+ param->eqe_size = 1 << (((byte_field &
+ MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
+ }
+
/* TPT attributes */
MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);