From 426dd00d76ad7c96250f5b5f871847d545fef33c Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Thu, 23 Aug 2012 14:09:04 +0000 Subject: mlx4_core: Fix wrong offset in parsing query device caps response The wrong offset was used when parsing the number of XRCs in mlx4_QUERY_DEV_CAP(). Signed-off-by: Dotan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c69648487321..7cf8ec5f3827 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -559,7 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pds = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); dev_cap->reserved_xrcds = field >> 4; - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); -- cgit v1.2.3 From a084feebd24764fab9657a586ea65484ddbd79cf Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 23 Aug 2012 14:09:05 +0000 Subject: mlx4_core: Remove annoying debug message in the resource tracker This innocent print makes it very hard to actually use the mlx4_core debug messages -- for example, the module load sequence of a device with two VFs yielded 3200 debug prints, with 2800 of them being this one. Let's just remove it. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 293c9e820c49..362b25e1f2e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -360,8 +360,6 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; - mlx4_dbg(dev, "res %s id 0x%llx to busy\n", - ResourceType(type), r->res_id); if (res) *((struct res_common **)res) = r; -- cgit v1.2.3 From e2c76824ca16a3e8443cc7b26abcb21af7c27b10 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:41 +0000 Subject: mlx4_core: Add proxy and tunnel QPs to the reserved QP area In addition, pass the proxy and tunnel QP numbers to slaves so the driver can perform special QP paravirtualization. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 14 +++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 3 +++ drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++ drivers/net/ethernet/mellanox/mlx4/qp.c | 29 +++++++++++++++++++--- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 3 ++- include/linux/mlx4/device.h | 13 +++++++++- 6 files changed, 62 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c69648487321..35be90178bef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -184,6 +184,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 +#define QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET 0x44 +#define QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET 0x48 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -247,6 +249,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + size = dev->caps.base_tunnel_sqpn + 8 * slave; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); + + size = dev->caps.sqp_start + 8 * slave; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); + } else err = -EINVAL; @@ -312,6 +320,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); func_cap->mcg_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); + func_cap->base_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); + func_cap->base_proxy_qpn = size & 0xFFFFFF; + for (i = 1; i <= func_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, MLX4_CMD_QUERY_FUNC_CAP, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 83fcbbf1b169..ced1de57c818 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,6 +134,9 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; + u32 base_qpn; + u32 base_tunnel_qpn; + u32 base_proxy_qpn; u8 physical_port[MLX4_MAX_PORTS + 1]; u8 port_flags[MLX4_MAX_PORTS + 1]; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f816c6aed72..06ef3afbc49a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -384,6 +384,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; + dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -541,6 +542,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + /* Calculate our sqp_start */ + dev->caps.sqp_start = func_cap.base_proxy_qpn; + dev->caps.base_tunnel_sqpn = func_cap.base_tunnel_qpn; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index fb2b36759cbf..b8da72b29cf7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -406,7 +406,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->caps.sqp_start = + dev->caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -437,13 +437,36 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) } + /* Reserve 8 real SQPs in both native and SRIOV modes. + * In addition, in SRIOV mode, reserve 8 proxy SQPs per function + * (for all PFs and VFs), and 8 corresponding tunnel QPs. + * Each proxy SQP works opposite its own tunnel QP. + * + * The QPs are arranged as follows: + * a. 8 real SQPs + * b. All the proxy SQPs (8 per function) + * c. All the tunnel QPs (8 per function) + */ + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->caps.sqp_start + 8, + (1 << 23) - 1, dev->caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), reserved_from_top); + + /* In mfunc, sqp_start is the base of the proxy SQPs, since the PF also + * uses paravirtualized SQPs. + * In native mode, sqp_start is the base of the real SQPs. */ + if (mlx4_is_mfunc(dev)) { + dev->caps.sqp_start = dev->caps.base_sqpn + + 8 * (mlx4_master_func_num(dev) + 1); + dev->caps.base_tunnel_sqpn = dev->caps.sqp_start + 8 * MLX4_MFUNC_MAX; + } else + dev->caps.sqp_start = dev->caps.base_sqpn; + if (err) return err; - return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); + return mlx4_CONF_SPECIAL_QP(dev, dev->caps.base_sqpn); } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 293c9e820c49..3c57a83e6287 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1105,7 +1105,8 @@ static void res_end_move(struct mlx4_dev *dev, int slave, static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { - return mlx4_is_qp_reserved(dev, qpn); + return mlx4_is_qp_reserved(dev, qpn) && + (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 07aa8232e631..d5c82b7216de 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -693,7 +693,18 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.sqp_start + 8); + return (qpn < dev->caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); +} + +static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) +{ + int base = dev->caps.sqp_start + slave * 8; + + if (qpn >= base && qpn < base + 8) + return 1; + + return 0; } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) -- cgit v1.2.3 From fc06573dfaf8a33bc0533bb70c49de13fa5232a4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:42 +0000 Subject: IB/mlx4: Initialize SR-IOV IB support for slaves in master context Allocate SR-IOV paravirtualization resources and MAD demuxing contexts on the master. This has two parts. The first part is to initialize the structures to contain the contexts. This is done at master startup time in mlx4_ib_init_sriov(). The second part is to actually create the tunneling resources required on the master to support a slave. This is performed the master detects that a slave has started up (MLX4_DEV_EVENT_SLAVE_INIT event generated when a slave initializes its comm channel). For the master, there is no such startup event, so it creates its own tunneling resources when it starts up. In addition, the master also creates the real special QPs. The ib_core layer on the master causes creation of proxy special QPs, since the master is also paravirtualized at the ib_core layer. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 684 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/main.c | 80 +++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 34 ++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 3 + include/linux/mlx4/device.h | 3 +- include/linux/mlx4/driver.h | 2 + 6 files changed, 798 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2ae7efd00f..e98849338a94 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -44,6 +44,35 @@ enum { MLX4_IB_VENDOR_CLASS2 = 0xa }; +#define MLX4_TUN_SEND_WRID_SHIFT 34 +#define MLX4_TUN_QPN_SHIFT 32 +#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT) +#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT) + +#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) +#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) + +struct mlx4_mad_rcv_buf { + struct ib_grh grh; + u8 payload[256]; +} __packed; + +struct mlx4_mad_snd_buf { + u8 payload[256]; +} __packed; + +struct mlx4_tunnel_mad { + struct ib_grh grh; + struct mlx4_ib_tunnel_header hdr; + struct ib_mad mad; +} __packed; + +struct mlx4_rcv_tunnel_mad { + struct mlx4_rcv_tunnel_hdr hdr; + struct ib_grh grh; + struct ib_mad mad; +} __packed; + int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) @@ -516,3 +545,658 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, ib_dispatch_event(&event); } + +static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, + struct mlx4_ib_demux_pv_qp *tun_qp, + int index) +{ + struct ib_sge sg_list; + struct ib_recv_wr recv_wr, *bad_recv_wr; + int size; + + size = (tun_qp->qp->qp_type == IB_QPT_UD) ? + sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf); + + sg_list.addr = tun_qp->ring[index].map; + sg_list.length = size; + sg_list.lkey = ctx->mr->lkey; + + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV | + MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt); + ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map, + size, DMA_FROM_DEVICE); + return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); +} + +static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS, + GFP_KERNEL); + if (!tun_qp->ring) + return -ENOMEM; + + tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + sizeof (struct mlx4_ib_tun_tx_buf), + GFP_KERNEL); + if (!tun_qp->tx_ring) { + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; + } + + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); + if (!tun_qp->ring[i].addr) + goto err; + tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev, + tun_qp->ring[i].addr, + rx_buf_size, + DMA_FROM_DEVICE); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->tx_ring[i].buf.addr = + kmalloc(tx_buf_size, GFP_KERNEL); + if (!tun_qp->tx_ring[i].buf.addr) + goto tx_err; + tun_qp->tx_ring[i].buf.map = + ib_dma_map_single(ctx->ib_dev, + tun_qp->tx_ring[i].buf.addr, + tx_buf_size, + DMA_TO_DEVICE); + tun_qp->tx_ring[i].ah = NULL; + } + spin_lock_init(&tun_qp->tx_lock); + tun_qp->tx_ix_head = 0; + tun_qp->tx_ix_tail = 0; + tun_qp->proxy_qpt = qp_type; + + return 0; + +tx_err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; + i = MLX4_NUM_TUNNEL_BUFS; +err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; +} + +static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return; + + tun_qp = &ctx->qp[qp_type]; + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + if (tun_qp->tx_ring[i].ah) + ib_destroy_ah(tun_qp->tx_ring[i].ah); + } + kfree(tun_qp->tx_ring); + kfree(tun_qp->ring); +} + +static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) +{ + /* dummy until next patch in series */ +} + +static void pv_qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct mlx4_ib_demux_pv_ctx *sqp = qp_context; + + /* It's worse than that! He's dead, Jim! */ + pr_err("Fatal error (%d) on a MAD QP on port %d\n", + event->event, sqp->port); +} + +static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int create_tun) +{ + int i, ret; + struct mlx4_ib_demux_pv_qp *tun_qp; + struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; + struct ib_qp_attr attr; + int qp_attr_mask_INIT; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.init_attr.send_cq = ctx->cq; + qp_init_attr.init_attr.recv_cq = ctx->cq; + qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_sge = 1; + qp_init_attr.init_attr.cap.max_recv_sge = 1; + if (create_tun) { + qp_init_attr.init_attr.qp_type = IB_QPT_UD; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; + qp_init_attr.port = ctx->port; + qp_init_attr.slave = ctx->slave; + qp_init_attr.proxy_qp_type = qp_type; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_QKEY | IB_QP_PORT; + } else { + qp_init_attr.init_attr.qp_type = qp_type; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY; + } + qp_init_attr.init_attr.port_num = ctx->port; + qp_init_attr.init_attr.qp_context = ctx; + qp_init_attr.init_attr.event_handler = pv_qp_event_handler; + tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); + if (IS_ERR(tun_qp->qp)) { + ret = PTR_ERR(tun_qp->qp); + tun_qp->qp = NULL; + pr_err("Couldn't create %s QP (%d)\n", + create_tun ? "tunnel" : "special", ret); + return ret; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IB_QPS_INIT; + attr.pkey_index = + to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; + attr.qkey = IB_QP1_QKEY; + attr.port_num = ctx->port; + ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); + if (ret) { + pr_err("Couldn't change %s qp state to INIT (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE); + if (ret) { + pr_err("Couldn't change %s qp state to RTR (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + pr_err("Couldn't change %s qp state to RTS (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); + if (ret) { + pr_err(" mlx4_ib_post_pv_buf error" + " (err = %d, i = %d)\n", ret, i); + goto err_qp; + } + } + return 0; + +err_qp: + ib_destroy_qp(tun_qp->qp); + tun_qp->qp = NULL; + return ret; +} + +/* + * IB MAD completion callback for real SQPs + */ +static void mlx4_ib_sqp_comp_worker(struct work_struct *work) +{ + /* dummy until next patch in series */ +} + +static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx **ret_ctx) +{ + struct mlx4_ib_demux_pv_ctx *ctx; + + *ret_ctx = NULL; + ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); + if (!ctx) { + pr_err("failed allocating pv resource context " + "for port %d, slave %d\n", port, slave); + return -ENOMEM; + } + + ctx->ib_dev = &dev->ib_dev; + ctx->port = port; + ctx->slave = slave; + *ret_ctx = ctx; + return 0; +} + +static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (dev->sriov.demux[port - 1].tun[slave]) { + kfree(dev->sriov.demux[port - 1].tun[slave]); + dev->sriov.demux[port - 1].tun[slave] = NULL; + } +} + +static int create_pv_resources(struct ib_device *ibdev, int slave, int port, + int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) +{ + int ret, cq_size; + + ctx->state = DEMUX_PV_STATE_STARTING; + /* have QP0 only on port owner, and only if link layer is IB */ + if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && + rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + ctx->has_smi = 1; + + if (ctx->has_smi) { + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret); + goto err_out; + } + } + + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret); + goto err_out_qp0; + } + + cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + if (ctx->has_smi) + cq_size *= 2; + + ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + NULL, ctx, cq_size, 0); + if (IS_ERR(ctx->cq)) { + ret = PTR_ERR(ctx->cq); + pr_err("Couldn't create tunnel CQ (%d)\n", ret); + goto err_buf; + } + + ctx->pd = ib_alloc_pd(ctx->ib_dev); + if (IS_ERR(ctx->pd)) { + ret = PTR_ERR(ctx->pd); + pr_err("Couldn't create tunnel PD (%d)\n", ret); + goto err_cq; + } + + ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(ctx->mr)) { + ret = PTR_ERR(ctx->mr); + pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); + goto err_pd; + } + + if (ctx->has_smi) { + ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP0 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_mr; + } + } + + ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP1 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_qp0; + } + + if (create_tun) + INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker); + else + INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); + + ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + + ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); + if (ret) { + pr_err("Couldn't arm tunnel cq (%d)\n", ret); + goto err_wq; + } + ctx->state = DEMUX_PV_STATE_ACTIVE; + return 0; + +err_wq: + ctx->wq = NULL; + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + + +err_qp0: + if (ctx->has_smi) + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + +err_mr: + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + +err_pd: + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + +err_cq: + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + +err_buf: + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun); + +err_out_qp0: + if (ctx->has_smi) + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun); +err_out: + ctx->state = DEMUX_PV_STATE_DOWN; + return ret; +} + +static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx *ctx, int flush) +{ + if (!ctx) + return; + if (ctx->state > DEMUX_PV_STATE_DOWN) { + ctx->state = DEMUX_PV_STATE_DOWNING; + if (flush) + flush_workqueue(ctx->wq); + if (ctx->has_smi) { + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1); + } + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave, + int port, int do_init) +{ + int ret = 0; + + if (!do_init) { + /* for master, destroy real sqp resources */ + if (slave == mlx4_master_func_num(dev->dev)) + destroy_pv_resources(dev, slave, port, + dev->sriov.sqps[port - 1], 1); + /* destroy the tunnel qp resources */ + destroy_pv_resources(dev, slave, port, + dev->sriov.demux[port - 1].tun[slave], 1); + return 0; + } + + /* create the tunnel qp resources */ + ret = create_pv_resources(&dev->ib_dev, slave, port, 1, + dev->sriov.demux[port - 1].tun[slave]); + + /* for master, create the real sqp resources */ + if (!ret && slave == mlx4_master_func_num(dev->dev)) + ret = create_pv_resources(&dev->ib_dev, slave, port, 0, + dev->sriov.sqps[port - 1]); + return ret; +} + +void mlx4_ib_tunnels_update_work(struct work_struct *work) +{ + struct mlx4_ib_demux_work *dmxw; + + dmxw = container_of(work, struct mlx4_ib_demux_work, work); + mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port, + dmxw->do_init); + kfree(dmxw); + return; +} + +static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, + struct mlx4_ib_demux_ctx *ctx, + int port) +{ + char name[12]; + int ret = 0; + int i; + + ctx->tun = kcalloc(dev->dev->caps.sqp_demux, + sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL); + if (!ctx->tun) + return -ENOMEM; + + ctx->dev = dev; + ctx->port = port; + ctx->ib_dev = &dev->ib_dev; + + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); + if (ret) { + ret = -ENOMEM; + goto err_wq; + } + } + + snprintf(name, sizeof name, "mlx4_ibt%d", port); + ctx->wq = create_singlethread_workqueue(name); + if (!ctx->wq) { + pr_err("Failed to create tunnelling WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wq; + } + + snprintf(name, sizeof name, "mlx4_ibud%d", port); + ctx->ud_wq = create_singlethread_workqueue(name); + if (!ctx->ud_wq) { + pr_err("Failed to create up/down WQ for port %d\n", port); + ret = -ENOMEM; + goto err_udwq; + } + + return 0; + +err_udwq: + destroy_workqueue(ctx->wq); + ctx->wq = NULL; + +err_wq: + for (i = 0; i < dev->dev->caps.sqp_demux; i++) + free_pv_object(dev, i, port); + kfree(ctx->tun); + ctx->tun = NULL; + return ret; +} + +static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) +{ + if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) { + sqp_ctx->state = DEMUX_PV_STATE_DOWNING; + flush_workqueue(sqp_ctx->wq); + if (sqp_ctx->has_smi) { + ib_destroy_qp(sqp_ctx->qp[0].qp); + sqp_ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0); + } + ib_destroy_qp(sqp_ctx->qp[1].qp); + sqp_ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); + ib_dereg_mr(sqp_ctx->mr); + sqp_ctx->mr = NULL; + ib_dealloc_pd(sqp_ctx->pd); + sqp_ctx->pd = NULL; + ib_destroy_cq(sqp_ctx->cq); + sqp_ctx->cq = NULL; + sqp_ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) +{ + int i; + if (ctx) { + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (!ctx->tun[i]) + continue; + if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN) + ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; + } + flush_workqueue(ctx->wq); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); + free_pv_object(dev, i, ctx->port); + } + kfree(ctx->tun); + destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wq); + } +} + +static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init) +{ + int i; + + if (!mlx4_is_master(dev->dev)) + return; + /* initialize or tear down tunnel QPs for the master */ + for (i = 0; i < dev->dev->caps.num_ports; i++) + mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init); + return; +} + +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) +{ + int i = 0; + int err; + + if (!mlx4_is_mfunc(dev->dev)) + return 0; + + dev->sriov.is_going_down = 0; + spin_lock_init(&dev->sriov.going_down_lock); + + mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n"); + + if (mlx4_is_slave(dev->dev)) { + mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n"); + return 0; + } + + mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", + dev->dev->caps.sqp_demux); + for (i = 0; i < dev->num_ports; i++) { + err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, + &dev->sriov.sqps[i]); + if (err) + goto demux_err; + err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); + if (err) + goto demux_err; + } + mlx4_ib_master_tunnels(dev, 1); + return 0; + +demux_err: + while (i > 0) { + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + --i; + } + + return err; +} + +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) +{ + int i; + unsigned long flags; + + if (!mlx4_is_mfunc(dev->dev)) + return; + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + dev->sriov.is_going_down = 1; + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + if (mlx4_is_master(dev->dev)) + for (i = 0; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.demux[i].ud_wq); + mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]); + kfree(dev->sriov.sqps[i]); + dev->sriov.sqps[i] = NULL; + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + } +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cc05579ebce7..3f7f77f93a1c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1357,11 +1357,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; + if (mlx4_ib_init_sriov(ibdev)) + goto err_mad; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) - goto err_reg; + goto err_sriov; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -1379,6 +1382,12 @@ err_notif: pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); +err_sriov: + mlx4_ib_close_sriov(ibdev); + +err_mad: + mlx4_ib_mad_cleanup(ibdev); + err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -1407,6 +1416,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; + mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); if (ibdev->iboe.nb.notifier_call) { @@ -1428,6 +1438,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ib_dealloc_device(&ibdev->ib_dev); } +static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) +{ + struct mlx4_ib_demux_work **dm = NULL; + struct mlx4_dev *dev = ibdev->dev; + int i; + unsigned long flags; + + if (!mlx4_is_master(dev)) + return; + + dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); + if (!dm) { + pr_err("failed to allocate memory for tunneling qp update\n"); + goto out; + } + + for (i = 0; i < dev->caps.num_ports; i++) { + dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); + if (!dm[i]) { + pr_err("failed to allocate memory for tunneling qp update work struct\n"); + for (i = 0; i < dev->caps.num_ports; i++) { + if (dm[i]) + kfree(dm[i]); + } + goto out; + } + } + /* initialize or tear down tunnel QPs for the slave */ + for (i = 0; i < dev->caps.num_ports; i++) { + INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); + dm[i]->port = i + 1; + dm[i]->slave = slave; + dm[i]->do_init = do_init; + dm[i]->dev = ibdev; + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) + queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } +out: + if (dm) + kfree(dm); + return; +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -1435,22 +1490,23 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; - int port = 0; + int p = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else - port = (u8)param; - - if (port > ibdev->num_ports) - return; + p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: + if (p > ibdev->num_ports) + return; ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: + if (p > ibdev->num_ports) + return; ibev.event = IB_EVENT_PORT_ERR; break; @@ -1472,12 +1528,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, handle_port_mgmt_change_event(&ew->work); return; + case MLX4_DEV_EVENT_SLAVE_INIT: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 1); + return; + + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 0); + return; + default: return; } ibev.device = ibdev_ptr; - ibev.element.port_num = port; + ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1248d576b031..137941d79870 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -176,6 +176,10 @@ enum mlx4_ib_qp_type { MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) +enum { + MLX4_NUM_TUNNEL_BUFS = 256, +}; + struct mlx4_ib_tunnel_header { struct mlx4_av av; __be32 remote_qpn; @@ -263,6 +267,15 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_demux_work { + struct work_struct work; + struct mlx4_ib_dev *dev; + int slave; + int do_init; + u8 port; + +}; + struct mlx4_ib_tun_tx_buf { struct mlx4_ib_buf buf; struct ib_ah *ah; @@ -278,9 +291,17 @@ struct mlx4_ib_demux_pv_qp { unsigned tx_ix_tail; }; +enum mlx4_ib_demux_pv_state { + DEMUX_PV_STATE_DOWN, + DEMUX_PV_STATE_STARTING, + DEMUX_PV_STATE_ACTIVE, + DEMUX_PV_STATE_DOWNING, +}; + struct mlx4_ib_demux_pv_ctx { int port; int slave; + enum mlx4_ib_demux_pv_state state; int has_smi; struct ib_device *ib_dev; struct ib_cq *cq; @@ -319,6 +340,13 @@ struct mlx4_ib_iboe { union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; +struct pkey_mgt { + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct list_head pkey_port_list[MLX4_MFUNC_MAX]; + struct kobject *device_parent[MLX4_MFUNC_MAX]; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -340,6 +368,7 @@ struct mlx4_ib_dev { int counters[MLX4_MAX_PORTS]; int *eq_table; int eq_added; + struct pkey_mgt pkeys; }; struct ib_event_work { @@ -424,6 +453,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); + int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -515,4 +547,6 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type); +void mlx4_ib_tunnels_update_work(struct work_struct *work); + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c8fef4353021..cb9bebe28276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1340,6 +1340,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); + /* write the version in the event field */ reply |= mlx4_comm_get_version(); @@ -1376,6 +1378,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma |= param; slave_state[slave].active = true; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5c82b7216de..b6b8d341b6c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,7 +54,8 @@ enum { }; enum { - MLX4_MAX_PORTS = 2 + MLX4_MAX_PORTS = 2, + MLX4_MAX_PORT_PKEYS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index d813704b963b..c257e1b211be 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -45,6 +45,8 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + MLX4_DEV_EVENT_SLAVE_INIT, + MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; struct mlx4_interface { -- cgit v1.2.3 From 54679e148287f0ca1bdd09264c908bacb9f19b3f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:43 +0000 Subject: mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop This requires: 1. Replacing the paravirtualized P_Key index (inserted by the guest) with the real P_Key index. 2. For UD QPs, placing the guest's true source GID index in the address path structure mgid field, and setting the ud_force_mgid bit so that the mgid is taken from the QP context and not from the WQE when posting sends. 3. For UC and RC QPs, placing the guest's true source GID index in the address path structure mgid field. 4. For tunnel and proxy QPs, setting the Q_Key value reserved for that proxy/tunnel pair. Since not all the above adjustments occur in all the QP transitions, the QP transitions require separate wrapper functions. Secondly, initialize the P_Key virtualization table to its default values: Master virtualized table is 1-1 with the real P_Key table, guest virtualized table has P_Key index 0 mapped to the real P_Key index 0, and all the other P_Key indices mapped to the reserved (invalid) P_Key at index 127. Finally, add logic in smp_snoop for maintaining the phys_P_Key_cache. and generating events on the master only if a P_Key actually changed. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 33 +++- drivers/infiniband/hw/mlx4/main.c | 35 ++++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 12 +- drivers/net/ethernet/mellanox/mlx4/main.c | 11 ++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 47 +++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 217 ++++++++++++++++++++- include/linux/mlx4/device.h | 3 + 7 files changed, 344 insertions(+), 14 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index e98849338a94..318d5bcf821b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -185,6 +185,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, { struct ib_port_info *pinfo; u16 lid; + __be16 *base; + u32 bn, pkey_change_bitmap; + int i; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || @@ -209,8 +213,33 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, break; case IB_SMP_ATTR_PKEY_TABLE: - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_PKEY_CHANGE); + if (!mlx4_is_mfunc(dev->dev)) { + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; + } + + bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; + base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); + pkey_change_bitmap = 0; + for (i = 0; i < 32; i++) { + pr_debug("PKEY[%d] = x%x\n", + i + bn*32, be16_to_cpu(base[i])); + if (be16_to_cpu(base[i]) != + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) { + pkey_change_bitmap |= (1 << i); + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] = + be16_to_cpu(base[i]); + } + } + pr_debug("PKEY Change event: port=%d, " + "block=0x%x, change_bitmap=0x%x\n", + port_num, bn, pkey_change_bitmap); + + if (pkey_change_bitmap) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; case IB_SMP_ATTR_GUID_INFO: diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3f7f77f93a1c..8e10ec2af7b6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1121,6 +1121,38 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event return NOTIFY_DONE; } +static void init_pkeys(struct mlx4_ib_dev *ibdev) +{ + int port; + int slave; + int i; + + if (mlx4_is_master(ibdev->dev)) { + for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) { + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = + /* master has the identity virt2phys pkey mapping */ + (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : + ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; + mlx4_sync_pkey_table(ibdev->dev, slave, port, i, + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); + } + } + } + /* initialize pkey cache */ + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) + ibdev->pkeys.phys_pkey_cache[port-1][i] = + (i) ? 0 : 0xFFFF; + } + } +} + static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; @@ -1375,6 +1407,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_active = true; + if (mlx4_is_mfunc(ibdev->dev)) + init_pkeys(ibdev); + return ibdev; err_notif: diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index cb9bebe28276..662a3c5da739 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -950,7 +950,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_INIT2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, @@ -968,7 +968,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, @@ -977,7 +977,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTS2RTS_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, @@ -986,7 +986,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQERR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, @@ -1013,7 +1013,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2SQD_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, @@ -1022,7 +1022,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 06ef3afbc49a..2294b7173180 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -424,6 +424,17 @@ int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) } EXPORT_SYMBOL(mlx4_get_parav_qkey); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->virt2phys_pkey[slave][port - 1][i] = val; +} +EXPORT_SYMBOL(mlx4_sync_pkey_table); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index dba69d98734a..7d27c3158d0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -807,6 +807,8 @@ struct mlx4_priv { struct io_mapping *bf_mapping; int reserved_mtts; int fs_hash_mode; + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -1011,16 +1013,61 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 3c57a83e6287..49e9de725d0a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -242,6 +242,15 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) return 0; } +enum qp_transition { + QP_TRANS_INIT2RTR, + QP_TRANS_RTR2RTS, + QP_TRANS_RTS2RTS, + QP_TRANS_SQERR2RTS, + QP_TRANS_SQD2SQD, + QP_TRANS_SQD2RTS +}; + /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -308,14 +317,41 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } } -static void update_ud_gid(struct mlx4_dev *dev, - struct mlx4_qp_context *qp_ctx, u8 slave) +static void update_pkey_index(struct mlx4_dev *dev, int slave, + struct mlx4_cmd_mailbox *inbox) { - u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + u8 sched = *(u8 *)(inbox->buf + 64); + u8 orig_index = *(u8 *)(inbox->buf + 35); + u8 new_index; + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + port = (sched >> 6 & 1) + 1; + + new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; + *(u8 *)(inbox->buf + 35) = new_index; + + mlx4_dbg(dev, "port = %d, orig pkey index = %d, " + "new pkey index = %d\n", port, orig_index, new_index); +} + +static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, + u8 slave) +{ + struct mlx4_qp_context *qp_ctx = inbox->buf + 8; + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); + u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; if (MLX4_QP_ST_UD == ts) qp_ctx->pri_path.mgid_index = 0x80 | slave; + if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + qp_ctx->pri_path.mgid_index = slave & 0x7F; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", slave, qp_ctx->pri_path.mgid_index); } @@ -1109,6 +1145,11 @@ static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } +static int fw_reserved(struct mlx4_dev *dev, int qpn) +{ + return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; +} + static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { @@ -1146,7 +1187,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) { + if (!fw_reserved(dev, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); @@ -1499,7 +1540,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) + if (!fw_reserved(dev, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); @@ -1939,6 +1980,19 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc) return be32_to_cpu(qpc->srqn) & 0x1ffffff; } +static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, + struct mlx4_qp_context *context) +{ + u32 qpn = vhcr->in_modifier & 0xffffff; + u32 qkey = 0; + + if (mlx4_get_parav_qkey(dev, qpn, &qkey)) + return; + + /* adjust qkey in qp context */ + context->qkey = cpu_to_be32(qkey); +} + int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1991,6 +2045,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, goto ex_put_scq; } + adjust_proxy_tun_qkey(dev, vhcr, qpc); + update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; @@ -2136,6 +2192,48 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, return err; } +static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *inbox, + enum qp_transition transition, u8 slave) +{ + u32 qp_type; + struct mlx4_qp_context *qp_ctx; + enum mlx4_qp_optpar optpar; + + qp_ctx = inbox->buf + 8; + qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + optpar = be32_to_cpu(*(__be32 *) inbox->buf); + + switch (qp_type) { + case MLX4_QP_ST_RC: + case MLX4_QP_ST_UC: + switch (transition) { + case QP_TRANS_INIT2RTR: + case QP_TRANS_RTR2RTS: + case QP_TRANS_RTS2RTS: + case QP_TRANS_SQD2SQD: + case QP_TRANS_SQD2RTS: + if (slave != mlx4_master_func_num(dev)) + /* slaves have only gid index 0 */ + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + if (qp_ctx->pri_path.mgid_index) + return -EINVAL; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + if (qp_ctx->alt_path.mgid_index) + return -EINVAL; + break; + default: + break; + } + + break; + default: + break; + } + + return 0; +} + int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2623,16 +2721,123 @@ out: return err; } +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int err; struct mlx4_qp_context *qpc = inbox->buf + 8; - update_ud_gid(dev, qpc, (u8)slave); + err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, qpc); + + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + + +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + if (err) + return err; + + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + if (err) + return err; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b6b8d341b6c8..9803fd5d3dba 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -940,6 +940,9 @@ int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, + int i, int val); + int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 0a9a01884d447c216eff75f8f274a0a3e82c7cee Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:45 +0000 Subject: mlx4: MAD_IFC paravirtualization The MAD_IFC firmware command fulfills two functions. First, it is used in the QP0/QP1 MAD-handling flow to obtain information from the FW (for answering queries), and for setting variables in the HCA (MAD SET packets). For this, MAD_IFC should provide the FW (physical) view of the data. This is the view that OpenSM needs. We call this the "network view". In the second case, MAD_IFC is used by various verbs to obtain data regarding the local HCA (e.g., ib_query_device()). We call this the "host view". This data needs to be paravirtualized. MAD_IFC therefore needs a wrapper function, and also needs another flag indicating whether it should provide the network view (when it is called by ib_process_mad in special-qp packet handling), or the host view (when it is called while implementing a verb). There are currently 2 flag parameters in mlx4_MAD_IFC already: ignore_bkey and ignore_mkey. These two parameters are replaced by a single "mad_ifc_flags" parameter, with different bits set for each flag. A third flag is added: "network-view/host-view". Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 20 ++-- drivers/infiniband/hw/mlx4/main.c | 64 ++++++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 14 ++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 162 +++++++++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 26 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8dfbf69f8370..ba2580693f79 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -75,7 +75,7 @@ struct mlx4_rcv_tunnel_mad { struct ib_mad mad; } __packed; -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) { @@ -102,10 +102,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, * Key check traps can't be generated unless we have in_wc to * tell us where to send the trap. */ - if (ignore_mkey || !in_wc) + if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc) op_modifier |= 0x1; - if (ignore_bkey || !in_wc) + if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc) op_modifier |= 0x2; + if (mlx4_is_mfunc(dev->dev) && + (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc)) + op_modifier |= 0x8; if (in_wc) { struct { @@ -138,10 +141,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, in_modifier |= in_wc->slid << 16; } - err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, - in_modifier, op_modifier, + err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, + mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier, MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_NATIVE); + (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED); if (!err) memcpy(response_mad, outmailbox->buf, 256); @@ -614,8 +617,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, prev_lid = pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, + (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | + (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) | + MLX4_MAD_IFC_NET_VIEW, port_num, in_wc, in_grh, in_mad, out_mad); if (err) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8e10ec2af7b6..45a6cc04036b 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -98,7 +98,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, + 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -182,11 +183,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) } static int ib_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) + struct ib_port_attr *props, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int ext_active_speed; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -198,7 +200,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, + if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -211,7 +216,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + if (netw_view) + props->gid_tbl_len = out_mad->data[50]; + else + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); @@ -244,7 +252,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -270,7 +278,7 @@ static u8 state_to_phys_state(enum ib_port_state state) } static int eth_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) + struct ib_port_attr *props, int netw_view) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); @@ -320,20 +328,27 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view) { int err; memset(props, 0, sizeof *props); err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props) : - eth_link_query_port(ibdev, port, props); + ib_link_query_port(ibdev, port, props, netw_view) : + eth_link_query_port(ibdev, port, props, netw_view); return err; } +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + /* returns host view */ + return __mlx4_ib_query_port(ibdev, port, props, 0); +} + static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -350,7 +365,8 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port, + NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -360,7 +376,8 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port, + NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -391,11 +408,12 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return iboe_query_gid(ibdev, port, index, gid); } -static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) +int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -407,7 +425,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); if (err) goto out; @@ -419,6 +441,11 @@ out: return err; } +static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); +} + static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { @@ -849,6 +876,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -858,8 +886,10 @@ static int init_node_data(struct mlx4_ib_dev *dev) init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + if (mlx4_is_master(dev->dev)) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -867,7 +897,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 137941d79870..ac71d56ffc7e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -176,6 +176,14 @@ enum mlx4_ib_qp_type { MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) +enum mlx4_ib_mad_ifc_flags { + MLX4_MAD_IFC_IGNORE_MKEY = 1, + MLX4_MAD_IFC_IGNORE_BKEY = 2, + MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY | + MLX4_MAD_IFC_IGNORE_BKEY), + MLX4_MAD_IFC_NET_VIEW = 4, +}; + enum { MLX4_NUM_TUNNEL_BUFS = 256, }; @@ -512,7 +520,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -527,6 +535,10 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view); +int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey, int netw_view); int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 662a3c5da739..a13d8a69b3bc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -40,6 +40,7 @@ #include #include +#include #include @@ -627,6 +628,149 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); + struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); + int err; + int i; + + if (index & 0x1f) + return -EINVAL; + + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) + return err; + + for (i = 0; i < 32; ++i) + pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); + + return err; +} + +static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + int i; + int err; + + for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { + err = query_pkey_block(dev, port, i, table + i, inbox, outbox); + if (err) + return err; + } + + return 0; +} +#define PORT_CAPABILITY_LOCATION_IN_SMP 20 +#define PORT_STATE_OFFSET 32 + +static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) +{ + /* will be modified when add alias_guid feature */ + return IB_PORT_DOWN; +} + +static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct ib_smp *smp = inbox->buf; + u32 index; + u8 port; + u16 *table; + int err; + int vidx, pidx; + struct mlx4_priv *priv = mlx4_priv(dev); + struct ib_smp *outsmp = outbox->buf; + __be16 *outtab = (__be16 *)(outsmp->data); + __be32 slave_cap_mask; + port = vhcr->in_modifier; + + if (smp->base_version == 1 && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->class_version == 1) { + if (smp->method == IB_MGMT_METHOD_GET) { + if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { + index = be32_to_cpu(smp->attr_mod); + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); + if (!table) + return -ENOMEM; + /* need to get the full pkey table because the paravirtualized + * pkeys may be scattered among several pkey blocks. + */ + err = get_full_pkey_table(dev, port, table, inbox, outbox); + if (!err) { + for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { + pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; + outtab[vidx % 32] = cpu_to_be16(table[pidx]); + } + } + kfree(table); + return err; + } + if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { + /*get the slave specific caps:*/ + /*do the command */ + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + /* modify the response for slaves */ + if (!err && slave != mlx4_master_func_num(dev)) { + u8 *state = outsmp->data + PORT_STATE_OFFSET; + + *state = (*state & 0xf0) | vf_port_state(dev, port, slave); + slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; + memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); + } + return err; + } + if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { + /* compute slave's gid block */ + smp->attr_mod = cpu_to_be32(slave / 8); + /* execute cmd */ + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + /* if needed, move slave gid to index 0 */ + if (slave % 8) + memcpy(outsmp->data, + outsmp->data + (slave % 8) * 8, 8); + /* delete all other gids */ + memset(outsmp->data + 8, 0, 56); + } + return err; + } + } + } + if (slave != mlx4_master_func_num(dev) && + ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || + (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_SET))) { + mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " + "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", + slave, smp->method, smp->mgmt_class, + be16_to_cpu(smp->attr_id)); + return -EPERM; + } + /*default:*/ + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); +} + int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1060,6 +1204,24 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, + { + .opcode = MLX4_CMD_CONF_SPECIAL_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, /* XXX verify: only demux can do this */ + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_MAD_IFC, + .has_inbox = true, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_MAD_IFC_wrapper + }, { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, -- cgit v1.2.3 From 993c401e207946fa56f69c51e39f015e7108497b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:48 +0000 Subject: mlx4_core: Add IB port-state machine and port mgmt event propagation For an IB port, a slave should not show port active until that slave has a valid alias-guid (provided by the subnet manager). Therefore the port-up event should be passed to a slave only after both the port is up, and the slave's alias-guid has been set. Also, provide the infrastructure for propagating port-management events (client-reregister, etc) to slaves. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/eq.c | 237 ++++++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/device.h | 28 ++++ 3 files changed, 250 insertions(+), 16 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 99a04648fab0..c425826b1fc0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -200,6 +200,196 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, slave_event(dev, slave, eqe); } +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; + + if (!s_slave->active) + return 0; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_pkey_eqe); + +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); + +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, + u8 port_subtype_change) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; + eqe.subtype = port_subtype_change; + eqe.event.port_change.port = cpu_to_be32(port << 28); + + mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, + port_subtype_change, slave, port); + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); + +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return SLAVE_PORT_DOWN; + } + return s_state[slave].port_state[port]; +} +EXPORT_SYMBOL(mlx4_get_slave_port_state); + +static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, + enum slave_port_state state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return -1; + } + s_state[slave].port_state[port] = state; + + return 0; +} + +static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) +{ + int i; + enum slave_port_gen_event gen_event; + + for (i = 0; i < dev->num_slaves; i++) + set_and_calc_slave_port_state(dev, i, port, event, &gen_event); +} +/************************************************************************** + The function get as input the new event to that port, + and according to the prev state change the slave's port state. + The events are: + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP + MLX4_PORT_STATE_IB_EVENT_GID_VALID + MLX4_PORT_STATE_IB_EVENT_GID_INVALID +***************************************************************************/ +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, + u8 port, int event, + enum slave_port_gen_event *gen_event) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *ctx = NULL; + unsigned long flags; + int ret = -1; + enum slave_port_state cur_state = + mlx4_get_slave_port_state(dev, slave, port); + + *gen_event = SLAVE_PORT_GEN_EVENT_NONE; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return ret; + } + + ctx = &priv->mfunc.master.slave_state[slave]; + spin_lock_irqsave(&ctx->lock, flags); + + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n", + __func__, slave, cur_state, event); + + switch (cur_state) { + case SLAVE_PORT_DOWN: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + break; + case SLAVE_PENDING_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_UP; + } + break; + case SLAVE_PORT_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == + event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } + break; + default: + pr_err("%s: BUG!!! UNKNOWN state: " + "slave:%d, port:%d\n", __func__, slave, port); + goto out; + } + ret = mlx4_get_slave_port_state(dev, slave, port); + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event" + " :%d gen_event: %d\n", + __func__, slave, cur_state, event, *gen_event); + +out: + spin_unlock_irqrestore(&ctx->lock, flags); + return ret; +} + +EXPORT_SYMBOL(set_and_calc_slave_port_state); + +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) +{ + struct mlx4_eqe eqe; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; + eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.params.port_info.changed_attr = + cpu_to_be32((u32) attr); + + slave_event(dev, ALL_SLAVES, &eqe); + return 0; +} +EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); + void mlx4_master_handle_slave_flr(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = @@ -251,6 +441,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u32 flr_slave; u8 update_slave_state; int i; + enum slave_port_gen_event gen_event; while ((eqe = next_eqe_sw(eq))) { /* @@ -347,35 +538,49 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) case MLX4_EVENT_TYPE_PORT_CHANGE: port = be32_to_cpu(eqe->event.port_change.port) >> 28; if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_DOWN, + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; - if (mlx4_is_master(dev)) - /*change the state of all slave's port - * to down:*/ - for (i = 0; i < dev->num_slaves; i++) { - mlx4_dbg(dev, "%s: Sending " - "MLX4_PORT_CHANGE_SUBTYPE_DOWN" + if (!mlx4_is_master(dev)) + break; + for (i = 0; i < dev->num_slaves; i++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - if (i == dev->caps.function) - continue; mlx4_slave_event(dev, i, eqe); + } else { /* IB port */ + set_and_calc_slave_port_state(dev, i, port, + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + &gen_event); + /*we can be in pending state, then do not send port_down event*/ + if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_slave_event(dev, i, eqe); + } } + } } else { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_UP, - port); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); + mlx4_priv(dev)->sense.do_sense_port[port] = 0; - if (mlx4_is_master(dev)) { + if (!mlx4_is_master(dev)) + break; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) for (i = 0; i < dev->num_slaves; i++) { - if (i == dev->caps.function) + if (i == mlx4_master_func_num(dev)) continue; mlx4_slave_event(dev, i, eqe); } - } + else /* IB port */ + /* port-up event will be sent to a slave when the + * slave's alias-guid is set. This is done in alias_GUID.c + */ + set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7d27c3158d0c..23f74759f403 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -452,6 +452,7 @@ struct mlx4_slave_state { /*initialized via the kzalloc*/ u8 is_slave_going_down; u32 cookie; + enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; }; struct slave_list { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9803fd5d3dba..380e01671d1e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -192,6 +192,25 @@ enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; +enum slave_port_state { + SLAVE_PORT_DOWN = 0, + SLAVE_PENDING_UP, + SLAVE_PORT_UP, +}; + +enum slave_port_gen_event { + SLAVE_PORT_GEN_EVENT_DOWN = 0, + SLAVE_PORT_GEN_EVENT_UP, + SLAVE_PORT_GEN_EVENT_NONE, +}; + +enum slave_port_state_event { + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, +}; + enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, @@ -945,4 +964,13 @@ void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); +int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr); +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); + + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From a0c64a17aba88c29d55ba989b96ac6ccb1268f0a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:49 +0000 Subject: mlx4: Add alias_guid mechanism For IB ports, we paravirtualize the GUID at index 0 on slaves. The GUID at index 0 seen by a slave is the actual GUID occupying the GUID table at the slave-id index. The driver, by default, requests at startup time that subnet manager populate its entire guid table with GUIDs. These guids are then mapped (paravirtualized) to the slaves, and appear for each slave as its GUID at index 0. Until each slave has such a guid, its port status is DOWN. The guid table is cached to support special QP paravirtualization, and event propagation to slaves on guid change (we test to see if the guid really changed before propagating an event to the slave). To support this caching, add capability to __mlx4_ib_query_gid() to obtain the network view (i.e., physical view) gid at index X, not just the host (paravirtualized) view. Based on a patch from Erez Shitrit Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/Makefile | 2 +- drivers/infiniband/hw/mlx4/alias_GUID.c | 688 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/mad.c | 19 +- drivers/infiniband/hw/mlx4/main.c | 37 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 74 ++++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 6 +- 6 files changed, 816 insertions(+), 10 deletions(-) create mode 100644 drivers/infiniband/hw/mlx4/alias_GUID.c (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile index bf0aa901c313..31d4c8aac679 100644 --- a/drivers/infiniband/hw/mlx4/Makefile +++ b/drivers/infiniband/hw/mlx4/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o -mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o +mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c new file mode 100644 index 000000000000..ef6d356927c3 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + /***********************************************************/ +/*This file support the handling of the Alias GUID feature. */ +/***********************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx4_ib.h" + +/* +The driver keeps the current state of all guids, as they are in the HW. +Whenever we receive an smp mad GUIDInfo record, the data will be cached. +*/ + +struct mlx4_alias_guid_work_context { + u8 port; + struct mlx4_ib_dev *dev ; + struct ib_sa_query *sa_query; + struct completion done; + int query_id; + struct list_head list; + int block_num; +}; + +struct mlx4_next_alias_guid_work { + u8 port; + u8 block_num; + struct mlx4_sriov_alias_guid_info_rec_det rec_det; +}; + + +void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, + u8 port_num, u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id; + int port_index = port_num - 1; + + if (!mlx4_is_master(dev->dev)) + return; + + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* The location of the specific index starts from bit number 4 + * until bit num 11 */ + if (test_bit(i + 4, (unsigned long *)&guid_indexes)) { + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->num_slaves) { + pr_debug("The last slave: %d\n", slave_id); + return; + } + + /* cache the guid: */ + memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id], + &p_data[i * GUID_REC_SIZE], + GUID_REC_SIZE); + } else + pr_debug("Guid number: %d in block: %d" + " was not updated\n", i, block_num); + } +} + +static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index) +{ + if (index >= NUM_ALIAS_GUID_PER_PORT) { + pr_err("%s: ERROR: asked for index:%d\n", __func__, index); + return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL); + } + return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; +} + + +static ib_sa_comp_mask get_aguid_comp_mask_from_ix(int index) +{ + return IB_SA_COMP_MASK(4 + index); +} + +/* + * Whenever new GUID is set/unset (guid table change) create event and + * notify the relevant slave (master also should be notified). + * If the GUID value is not as we have in the cache the slave will not be + * updated; in this case it waits for the smp_snoop or the port management + * event to call the function and to update the slave. + * block_number - the index of the block (16 blocks available) + * port_number - 1 or 2 + */ +void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, u8 port_num, + u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id; + enum slave_port_state new_state; + enum slave_port_state prev_state; + __be64 tmp_cur_ag, form_cache_ag; + enum slave_port_gen_event gen_event; + + if (!mlx4_is_master(dev->dev)) + return; + + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + /*calculate the slaves and notify them*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* the location of the specific index runs from bits 4..11 */ + if (!(test_bit(i + 4, (unsigned long *)&guid_indexes))) + continue; + + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->num_slaves) + return; + tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; + form_cache_ag = get_cached_alias_guid(dev, port_num, + (NUM_ALIAS_GUID_IN_REC * block_num) + i); + /* + * Check if guid is not the same as in the cache, + * If it is different, wait for the snoop_smp or the port mgmt + * change event to update the slave on its port state change + */ + if (tmp_cur_ag != form_cache_ag) + continue; + mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); + + /*2 cases: Valid GUID, and Invalid Guid*/ + + if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ + prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num); + new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + &gen_event); + pr_debug("slave: %d, port: %d prev_port_state: %d," + " new_port_state: %d, gen_event: %d\n", + slave_id, port_num, prev_state, new_state, gen_event); + if (gen_event == SLAVE_PORT_GEN_EVENT_UP) { + pr_debug("sending PORT_UP event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, slave_id, + port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE); + } + } else { /* request to invalidate GUID */ + set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, + &gen_event); + pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, + MLX4_PORT_CHANGE_SUBTYPE_DOWN); + } + } +} + +static void aliasguid_query_handler(int status, + struct ib_sa_guidinfo_rec *guid_rec, + void *context) +{ + struct mlx4_ib_dev *dev; + struct mlx4_alias_guid_work_context *cb_ctx = context; + u8 port_index ; + int i; + struct mlx4_sriov_alias_guid_info_rec_det *rec; + unsigned long flags, flags1; + + if (!context) + return; + + dev = cb_ctx->dev; + port_index = cb_ctx->port - 1; + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[cb_ctx->block_num]; + + if (status) { + rec->status = MLX4_GUID_INFO_STATUS_IDLE; + pr_debug("(port: %d) failed: status = %d\n", + cb_ctx->port, status); + goto out; + } + + if (guid_rec->block_num != cb_ctx->block_num) { + pr_err("block num mismatch: %d != %d\n", + cb_ctx->block_num, guid_rec->block_num); + goto out; + } + + pr_debug("lid/port: %d/%d, block_num: %d\n", + be16_to_cpu(guid_rec->lid), cb_ctx->port, + guid_rec->block_num); + + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[guid_rec->block_num]; + + rec->status = MLX4_GUID_INFO_STATUS_SET; + rec->method = MLX4_GUID_INFO_RECORD_SET; + + for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { + __be64 tmp_cur_ag; + tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; + /* check if the SM didn't assign one of the records. + * if it didn't, if it was not sysadmin request: + * ask the SM to give a new GUID, (instead of the driver request). + */ + if (tmp_cur_ag == MLX4_NOT_SET_GUID) { + mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " + "block_num: %d was declined by SM, " + "ownership by %d (0 = driver, 1=sysAdmin," + " 2=None)\n", __func__, i, + guid_rec->block_num, rec->ownership); + if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { + /* if it is driver assign, asks for new GUID from SM*/ + *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = + MLX4_NOT_SET_GUID; + + /* Mark the record as not assigned, and let it + * be sent again in the next work sched.*/ + rec->status = MLX4_GUID_INFO_STATUS_IDLE; + rec->guid_indexes |= get_aguid_comp_mask_from_ix(i); + } + } else { + /* properly assigned record. */ + /* We save the GUID we just got from the SM in the + * admin_guid in order to be persistent, and in the + * request from the sm the process will ask for the same GUID */ + if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && + tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { + /* the sysadmin assignment failed.*/ + mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" + " admin guid after SysAdmin " + "configuration. " + "Record num %d in block_num:%d " + "was declined by SM, " + "new val(0x%llx) was kept\n", + __func__, i, + guid_rec->block_num, + be64_to_cpu(*(__be64 *) & + rec->all_recs[i * GUID_REC_SIZE])); + } else { + memcpy(&rec->all_recs[i * GUID_REC_SIZE], + &guid_rec->guid_info_list[i * GUID_REC_SIZE], + GUID_REC_SIZE); + } + } + } + /* + The func is call here to close the cases when the + sm doesn't send smp, so in the sa response the driver + notifies the slave. + */ + mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num, + cb_ctx->port, + guid_rec->guid_info_list); +out: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, + &dev->sriov.alias_guid.ports_guid[port_index]. + alias_guid_work, 0); + if (cb_ctx->sa_query) { + list_del(&cb_ctx->list); + kfree(cb_ctx); + } else + complete(&cb_ctx->done); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index) +{ + int i; + u64 cur_admin_val; + ib_sa_comp_mask comp_mask = 0; + + dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status + = MLX4_GUID_INFO_STATUS_IDLE; + dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method + = MLX4_GUID_INFO_RECORD_SET; + + /* calculate the comp_mask for that record.*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + cur_admin_val = + *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; + /* + check the admin value: if it's for delete (~00LL) or + it is the first guid of the first record (hw guid) or + the records is not in ownership of the sysadmin and the sm doesn't + need to assign GUIDs, then don't put it up for assignment. + */ + if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || + (!index && !i) || + MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid. + ports_guid[port - 1].all_rec_per_port[index].ownership) + continue; + comp_mask |= get_aguid_comp_mask_from_ix(i); + } + dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].guid_indexes = comp_mask; +} + +static int set_guid_rec(struct ib_device *ibdev, + u8 port, int index, + struct mlx4_sriov_alias_guid_info_rec_det *rec_det) +{ + int err; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_sa_guidinfo_rec guid_info_rec; + ib_sa_comp_mask comp_mask; + struct ib_port_attr attr; + struct mlx4_alias_guid_work_context *callback_context; + unsigned long resched_delay, flags, flags1; + struct list_head *head = + &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; + + err = __mlx4_ib_query_port(ibdev, port, &attr, 1); + if (err) { + pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", + err, port); + return err; + } + /*check the port was configured by the sm, otherwise no need to send */ + if (attr.state != IB_PORT_ACTIVE) { + pr_debug("port %d not active...rescheduling\n", port); + resched_delay = 5 * HZ; + err = -EAGAIN; + goto new_schedule; + } + + callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL); + if (!callback_context) { + err = -ENOMEM; + resched_delay = HZ * 5; + goto new_schedule; + } + callback_context->port = port; + callback_context->dev = dev; + callback_context->block_num = index; + + memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); + + guid_info_rec.lid = cpu_to_be16(attr.lid); + guid_info_rec.block_num = index; + + memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, + GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC); + comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM | + rec_det->guid_indexes; + + init_completion(&callback_context->done); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_add_tail(&callback_context->list, head); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + + callback_context->query_id = + ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, + ibdev, port, &guid_info_rec, + comp_mask, rec_det->method, 1000, + GFP_KERNEL, aliasguid_query_handler, + callback_context, + &callback_context->sa_query); + if (callback_context->query_id < 0) { + pr_debug("ib_sa_guid_info_rec_query failed, query_id: " + "%d. will reschedule to the next 1 sec.\n", + callback_context->query_id); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_del(&callback_context->list); + kfree(callback_context); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + resched_delay = 1 * HZ; + err = -EAGAIN; + goto new_schedule; + } + err = 0; + goto out; + +new_schedule: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + invalidate_guid_record(dev, port, index); + if (!dev->sriov.is_going_down) { + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + resched_delay); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + +out: + return err; +} + +void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) +{ + int i; + unsigned long flags, flags1; + + pr_debug("port %d\n", port); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) + invalidate_guid_record(dev, port, i); + + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { + /* + make sure no work waits in the queue, if the work is already + queued(not on the timer) the cancel will fail. That is not a problem + because we just want the work started. + */ + __cancel_delayed_work(&dev->sriov.alias_guid. + ports_guid[port - 1].alias_guid_work); + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +/* The function returns the next record that was + * not configured (or failed to be configured) */ +static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, + struct mlx4_next_alias_guid_work *rec) +{ + int j; + unsigned long flags; + + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status == + MLX4_GUID_INFO_STATUS_IDLE) { + memcpy(&rec->rec_det, + &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], + sizeof (struct mlx4_sriov_alias_guid_info_rec_det)); + rec->port = port; + rec->block_num = j; + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status = + MLX4_GUID_INFO_STATUS_PENDING; + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + return 0; + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + } + return -ENOENT; +} + +static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, + int rec_index, + struct mlx4_sriov_alias_guid_info_rec_det *rec_det) +{ + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes = + rec_det->guid_indexes; + memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs, + rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status = + rec_det->status; +} + +static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port) +{ + int j; + struct mlx4_sriov_alias_guid_info_rec_det rec_det ; + + for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) { + memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); + rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) | + IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 | + IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 | + IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 | + IB_SA_GUIDINFO_REC_GID7; + rec_det.status = MLX4_GUID_INFO_STATUS_IDLE; + set_administratively_guid_record(dev, port, j, &rec_det); + } +} + +static void alias_guid_work(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + int ret = 0; + struct mlx4_next_alias_guid_work *rec; + struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port = + container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det, + alias_guid_work); + struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent; + struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid, + struct mlx4_ib_sriov, + alias_guid); + struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); + + rec = kzalloc(sizeof *rec, GFP_KERNEL); + if (!rec) { + pr_err("alias_guid_work: No Memory\n"); + return; + } + + pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); + ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); + if (ret) { + pr_debug("No more records to update.\n"); + goto out; + } + + set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, + &rec->rec_det); + +out: + kfree(rec); +} + + +void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) +{ + unsigned long flags, flags1; + + if (!mlx4_is_master(dev->dev)) + return; + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) { + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, + &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) +{ + int i; + struct mlx4_ib_sriov *sriov = &dev->sriov; + struct mlx4_alias_guid_work_context *cb_ctx; + struct mlx4_sriov_alias_guid_port_rec_det *det; + struct ib_sa_query *sa_query; + unsigned long flags; + + for (i = 0 ; i < dev->num_ports; i++) { + cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); + det = &sriov->alias_guid.ports_guid[i]; + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + while (!list_empty(&det->cb_list)) { + cb_ctx = list_entry(det->cb_list.next, + struct mlx4_alias_guid_work_context, + list); + sa_query = cb_ctx->sa_query; + cb_ctx->sa_query = NULL; + list_del(&cb_ctx->list); + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + ib_sa_cancel_query(cb_ctx->query_id, sa_query); + wait_for_completion(&cb_ctx->done); + kfree(cb_ctx); + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + } + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + } + for (i = 0 ; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + } + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); +} + +int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) +{ + char alias_wq_name[15]; + int ret = 0; + int i, j, k; + union ib_gid gid; + + if (!mlx4_is_master(dev->dev)) + return 0; + dev->sriov.alias_guid.sa_client = + kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL); + if (!dev->sriov.alias_guid.sa_client) + return -ENOMEM; + + ib_sa_register_client(dev->sriov.alias_guid.sa_client); + + spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); + + for (i = 1; i <= dev->num_ports; ++i) { + if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + ret = -EFAULT; + goto err_unregister; + } + } + + for (i = 0 ; i < dev->num_ports; i++) { + memset(&dev->sriov.alias_guid.ports_guid[i], 0, + sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); + /*Check if the SM doesn't need to assign the GUIDs*/ + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + if (mlx4_ib_sm_guid_assign) { + dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j]. + ownership = MLX4_GUID_DRIVER_ASSIGN; + continue; + } + dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j]. + ownership = MLX4_GUID_NONE_ASSIGN; + /*mark each val as it was deleted, + till the sysAdmin will give it valid val*/ + for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) { + *(__be64 *)&dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] = + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); + } + } + INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); + /*prepare the records, set them to be allocated by sm*/ + for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) + invalidate_guid_record(dev, i + 1, j); + + dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; + dev->sriov.alias_guid.ports_guid[i].port = i; + if (mlx4_ib_sm_guid_assign) + set_all_slaves_guids(dev, i); + + snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); + dev->sriov.alias_guid.ports_guid[i].wq = + create_singlethread_workqueue(alias_wq_name); + if (!dev->sriov.alias_guid.ports_guid[i].wq) { + ret = -ENOMEM; + goto err_thread; + } + INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work, + alias_guid_work); + } + return 0; + +err_thread: + for (--i; i >= 0; i--) { + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + dev->sriov.alias_guid.ports_guid[i].wq = NULL; + } + +err_unregister: + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); + dev->sriov.alias_guid.sa_client = NULL; + pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 2f13894299ee..b8cb25ebce50 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -791,8 +791,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) { - /* re-configure the mcg's */ + /* re-configure the alias-guid and mcg's */ if (mlx4_is_master(dev->dev)) { + mlx4_ib_invalidate_all_guid_record(dev, port_num); + if (!dev->sriov.is_going_down) mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); } @@ -1808,9 +1810,20 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) return 0; } + err = mlx4_ib_init_alias_guid_service(dev); + if (err) { + mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); + goto paravirt_err; + } + mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", dev->dev->caps.sqp_demux); for (i = 0; i < dev->num_ports; i++) { + union ib_gid gid; + err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1); + if (err) + goto demux_err; + dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) @@ -1828,6 +1841,9 @@ demux_err: mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); --i; } + mlx4_ib_destroy_alias_guid_service(dev); + +paravirt_err: mlx4_ib_cm_paravirt_clean(dev, -1); return err; @@ -1854,5 +1870,6 @@ void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) } mlx4_ib_cm_paravirt_clean(dev, -1); + mlx4_ib_destroy_alias_guid_service(dev); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b959fe4665dd..7d97578fbbaa 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -59,6 +59,10 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +int mlx4_ib_sm_guid_assign = 1; +module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); +MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); + static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -349,12 +353,15 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, return __mlx4_ib_query_port(ibdev, port, props, 0); } -static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int clear = 0; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -365,18 +372,29 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port, - NULL, NULL, in_mad, out_mad); + if (mlx4_is_mfunc(dev->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); + if (mlx4_is_mfunc(dev->dev) && !netw_view) { + if (index) { + /* For any index > 0, return the null guid */ + err = 0; + clear = 1; + goto out; + } + } + init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); - err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port, + err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -384,6 +402,8 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: + if (clear) + memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; @@ -403,7 +423,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) - return __mlx4_ib_query_gid(ibdev, port, index, gid); + return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); else return iboe_query_gid(ibdev, port, index, gid); } @@ -1566,6 +1586,11 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; + if (mlx4_is_master(dev) && + rdma_port_get_link_layer(&ibdev->ib_dev, p) == + IB_LINK_LAYER_INFINIBAND) { + mlx4_ib_invalidate_all_guid_record(ibdev, p); + } ibev.event = IB_EVENT_PORT_ACTIVE; break; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7476e2439f6b..f3f75f8229a7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,9 @@ enum { #define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) #define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) +/*module param to indicate if SM assigns the alias_GUID*/ +extern int mlx4_ib_sm_guid_assign; + struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -277,6 +281,57 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +/****************************************/ +/* alias guid support */ +/****************************************/ +#define NUM_PORT_ALIAS_GUID 2 +#define NUM_ALIAS_GUID_IN_REC 8 +#define NUM_ALIAS_GUID_REC_IN_PORT 16 +#define GUID_REC_SIZE 8 +#define NUM_ALIAS_GUID_PER_PORT 128 +#define MLX4_NOT_SET_GUID (0x00LL) +#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) + +enum mlx4_guid_alias_rec_status { + MLX4_GUID_INFO_STATUS_IDLE, + MLX4_GUID_INFO_STATUS_SET, + MLX4_GUID_INFO_STATUS_PENDING, +}; + +enum mlx4_guid_alias_rec_ownership { + MLX4_GUID_DRIVER_ASSIGN, + MLX4_GUID_SYSADMIN_ASSIGN, + MLX4_GUID_NONE_ASSIGN, /*init state of each record*/ +}; + +enum mlx4_guid_alias_rec_method { + MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, + MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE, +}; + +struct mlx4_sriov_alias_guid_info_rec_det { + u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; + ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ + enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ + u8 method; /*set or delete*/ + enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ +}; + +struct mlx4_sriov_alias_guid_port_rec_det { + struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT]; + struct workqueue_struct *wq; + struct delayed_work alias_guid_work; + u8 port; + struct mlx4_sriov_alias_guid *parent; + struct list_head cb_list; +}; + +struct mlx4_sriov_alias_guid { + struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS]; + spinlock_t ag_work_lock; + struct ib_sa_client *sa_client; +}; + struct mlx4_ib_demux_work { struct work_struct work; struct mlx4_ib_dev *dev; @@ -349,6 +404,8 @@ struct mlx4_ib_sriov { spinlock_t going_down_lock; int is_going_down; + struct mlx4_sriov_alias_guid alias_guid; + /* CM paravirtualization fields */ struct list_head cm_list; spinlock_t id_map_lock; @@ -555,6 +612,9 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view); +int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid, int netw_view); + int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); @@ -606,4 +666,18 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev); void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id); +/* alias guid support */ +void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port); +int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev); +void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev); +void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port); + +void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, + u8 port_num, u8 *p_data); + +void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, u8 port_num, + u8 *p_data); + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a13d8a69b3bc..ea5c884ab899 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -674,8 +674,10 @@ static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) { - /* will be modified when add alias_guid feature */ - return IB_PORT_DOWN; + if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) + return IB_PORT_ACTIVE; + else + return IB_PORT_DOWN; } static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, -- cgit v1.2.3 From 2a4fae148cf4b60e73faf0a427302697917409d9 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:50 +0000 Subject: IB/mlx4: Propagate P_Key and guid change port management events to slaves P_Key change and guid change events are not of interest to all slaves, but only to those slaves which "see" the table slots whose contents have change. For example, if the guid at port 1, index 5 has changed in the PPF, we wish to propagate the gid-change event only to the function which has that guid index mapped to its port/guid table (in this case it is slave #5). Other functions should not get the event, since the event does not affect them. Similarly with P_Keys -- P_Key change events are forwarded only to slaves which have that P_Key index mapped to their virtual P_Key table. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 162 ++++++++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 ++ 2 files changed, 161 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b8cb25ebce50..591c2891159b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -54,6 +54,15 @@ enum { #define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) + /* Port mgmt change event handling */ + +#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr) +#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask) +#define NUM_IDX_IN_PKEY_TBL_BLK 32 +#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */ +#define GUID_TBL_BLK_NUM_ENTRIES 8 +#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) + struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -76,6 +85,9 @@ struct mlx4_rcv_tunnel_mad { } __packed; static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num); +static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); +static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + int block, u32 change_bitmap); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) { @@ -220,8 +232,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, handle_client_rereg_event(dev, port_num); if (prev_lid != lid) - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_LID_CHANGE); + handle_lid_change_event(dev, port_num); break; case IB_SMP_ATTR_PKEY_TABLE: @@ -231,6 +242,9 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, break; } + /* at this point, we are running in the master. + * Slaves do not receive SMPs. + */ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); pkey_change_bitmap = 0; @@ -248,10 +262,13 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, "block=0x%x, change_bitmap=0x%x\n", port_num, bn, pkey_change_bitmap); - if (pkey_change_bitmap) + if (pkey_change_bitmap) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); - + if (!dev->sriov.is_going_down) + __propagate_pkey_ev(dev, port_num, bn, + pkey_change_bitmap); + } break; case IB_SMP_ATTR_GUID_INFO: @@ -259,12 +276,56 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_GID_CHANGE); + /*if master, notify relevant slaves*/ + if (mlx4_is_master(dev->dev) && + !dev->sriov.is_going_down) { + bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod); + mlx4_ib_update_cache_on_guid_change(dev, bn, port_num, + (u8 *)(&((struct ib_smp *)mad)->data)); + mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num, + (u8 *)(&((struct ib_smp *)mad)->data)); + } break; + default: break; } } +static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + int block, u32 change_bitmap) +{ + int i, ix, slave, err; + int have_event = 0; + + for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) { + if (slave == mlx4_master_func_num(dev->dev)) + continue; + if (!mlx4_is_slave_active(dev->dev, slave)) + continue; + + have_event = 0; + for (i = 0; i < 32; i++) { + if (!(change_bitmap & (1 << i))) + continue; + for (ix = 0; + ix < dev->dev->caps.pkey_table_len[port_num]; ix++) { + if (dev->pkeys.virt2phys_pkey[slave][port_num - 1] + [ix] == i + 32 * block) { + err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num); + pr_debug("propagate_pkey_ev: slave %d," + " port %d, ix %d (%d)\n", + slave, port_num, ix, err); + have_event = 1; + break; + } + } + if (have_event) + break; + } + } +} + static void node_desc_override(struct ib_device *dev, struct ib_mad *mad) { @@ -789,18 +850,90 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } } +static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num) +{ + mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE); + + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) + mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, + MLX4_EQ_PORT_INFO_LID_CHANGE_MASK); +} + static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) { /* re-configure the alias-guid and mcg's */ if (mlx4_is_master(dev->dev)) { mlx4_ib_invalidate_all_guid_record(dev, port_num); - if (!dev->sriov.is_going_down) + if (!dev->sriov.is_going_down) { mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); + mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, + MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); + } } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); } +static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + struct mlx4_eqe *eqe) +{ + __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe), + GET_MASK_FROM_EQE(eqe)); +} + +static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, + u32 guid_tbl_blk_num, u32 change_bitmap) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + u16 i; + + if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev)) + return; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) { + mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); + goto out; + } + + guid_tbl_blk_num *= 4; + + for (i = 0; i < 4; i++) { + if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff))) + continue; + memset(in_mad, 0, sizeof *in_mad); + memset(out_mad, 0, sizeof *out_mad); + + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i); + + if (mlx4_MAD_IFC(dev, + MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW, + port_num, NULL, NULL, in_mad, out_mad)) { + mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n"); + goto out; + } + + mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i, + port_num, + (u8 *)(&((struct ib_smp *)out_mad)->data)); + mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i, + port_num, + (u8 *)(&((struct ib_smp *)out_mad)->data)); + } + +out: + kfree(in_mad); + kfree(out_mad); + return; +} + void handle_port_mgmt_change_event(struct work_struct *work) { struct ib_event_work *ew = container_of(work, struct ib_event_work, work); @@ -808,6 +941,8 @@ void handle_port_mgmt_change_event(struct work_struct *work) struct mlx4_eqe *eqe = &(ew->ib_eqe); u8 port = eqe->event.port_mgmt_change.port; u32 changed_attr; + u32 tbl_block; + u32 change_bitmap; switch (eqe->subtype) { case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: @@ -823,11 +958,16 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Check if it is a lid change event */ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) - mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); + handle_lid_change_event(dev, port); /* Generate GUID changed event */ - if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /*if master, notify all slaves*/ + if (mlx4_is_master(dev->dev)) + mlx4_gen_slaves_port_mgt_ev(dev->dev, port, + MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK); + } if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) handle_client_rereg_event(dev, port); @@ -835,11 +975,19 @@ void handle_port_mgmt_change_event(struct work_struct *work) case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) + propagate_pkey_ev(dev, port, eqe); break; case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /*if master, notify relevant slaves*/ + else if (!dev->sriov.is_going_down) { + tbl_block = GET_BLK_PTR_FROM_EQE(eqe); + change_bitmap = GET_MASK_FROM_EQE(eqe); + handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); + } break; default: pr_warn("Unsupported subtype 0x%x for " diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 35be90178bef..b78803937897 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -729,6 +729,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + u64 flags; int err = 0; u8 field; @@ -737,6 +738,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; + /* add port mng change event capability unconditionally to slaves */ + MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; + MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; -- cgit v1.2.3 From efcd235d736ab05ef2b29d7fe1493a2f52b07b66 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:52 +0000 Subject: net/mlx4_core: Adjustments to SET_PORT for IB SR-IOV 1. Slaves may not set the IS_SM capability for the port. 2. DEV_MGMT may not be set in multifunction mode. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/port.c | 10 ++++++++++ include/linux/mlx4/device.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index e36dd0f2fa73..4c51b05efa28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -732,6 +732,16 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, new_cap_mask = ((__be32 *) inbox->buf)[1]; } + /* slave may not set the IS_SM capability for the port */ + if (slave != mlx4_master_func_num(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM)) + return -EINVAL; + + /* No DEV_MGMT in multifunc mode */ + if (mlx4_is_mfunc(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP)) + return -EINVAL; + agg_cap_mask = 0; slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 380e01671d1e..b3f9b4500b9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -53,6 +53,11 @@ enum { MLX4_FLAG_SRIOV = 1 << 4, }; +enum { + MLX4_PORT_CAP_IS_SM = 1 << 1, + MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, +}; + enum { MLX4_MAX_PORTS = 2, MLX4_MAX_PORT_PKEYS = 128 -- cgit v1.2.3 From 980e90010f15362b3b02ed4875ef2758aee3de72 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:53 +0000 Subject: mlx4_core: INIT/CLOSE port logic for IB ports in SR-IOV mode Normally, INIT_PORT and CLOSE_PORT are invoked when special QP0 transitions to RTR, or transitions to ERR/RESET respectively. In SR-IOV mode, however, the master is also paravirtualized. This in turn requires that we not do INIT_PORT until the entire QP0 path (real QP0 and proxy QP0) is ready to receive. When the real QP0 goes down, we should indicate that the port is not active. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 71 +++++++++++++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/qp.c | 38 ++++++++++++++++-- 2 files changed, 92 insertions(+), 17 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index b78803937897..bf159c0fe82b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1365,6 +1365,19 @@ out: return err; } +/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 + * and real QP0 are active, so that the paravirtualized QP0 is ready + * to operate */ +static int check_qp0_state(struct mlx4_dev *dev, int function, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + /* irrelevant if not infiniband */ + if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && + priv->mfunc.master.qp0_state[port].qp0_active) + return 1; + return 0; +} + int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1381,14 +1394,29 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) return -ENODEV; - /* Enable port only if it was previously disabled */ - if (!priv->mfunc.master.init_port_ref[port]) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - if (err) - return err; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + /* Enable port only if it was previously disabled */ + if (!priv->mfunc.master.init_port_ref[port]) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } else { + if (slave == mlx4_master_func_num(dev)) { + if (check_qp0_state(dev, slave, port) && + !priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.qp0_state[port].port_active = 1; + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } - priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); ++priv->mfunc.master.init_port_ref[port]; return 0; } @@ -1463,13 +1491,30 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) return -ENODEV; - if (priv->mfunc.master.init_port_ref[port] == 1) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, - MLX4_CMD_NATIVE); - if (err) - return err; + + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + if (priv->mfunc.master.init_port_ref[port] == 1) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + 1000, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + } else { + /* infiniband port */ + if (slave == mlx4_master_func_num(dev)) { + if (!priv->mfunc.master.qp0_state[port].qp0_active && + priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + 1000, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + priv->mfunc.master.qp0_state[port].port_active = 0; + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } - priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); --priv->mfunc.master.init_port_ref[port]; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index b8da72b29cf7..436ef6c69add 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -67,10 +67,18 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) complete(&qp->free); } -static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp) +/* used for INIT/CLOSE port logic */ +static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) { - return qp->qpn >= dev->caps.sqp_start && + /* qp0 is either the proxy qp0, or the real qp0 */ + *proxy_qp0 = qp->qpn >= dev->caps.sqp_start && qp->qpn <= dev->caps.sqp_start + 1; + + *real_qp0 = mlx4_is_master(dev) && + qp->qpn >= dev->caps.base_sqpn && + qp->qpn <= dev->caps.base_sqpn + 1; + + return *real_qp0 || *proxy_qp0; } static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, @@ -122,6 +130,8 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int ret = 0; + int real_qp0 = 0; + int proxy_qp0 = 0; u8 port; if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || @@ -133,9 +143,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && - is_qp0(dev, qp)) { + is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; - priv->mfunc.master.qp0_state[port].qp0_active = 0; + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; } return ret; } @@ -162,6 +175,23 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); + if (mlx4_is_master(dev) && is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + port = (qp->qpn & 1) + 1; + if (cur_state != MLX4_QP_STATE_ERR && + cur_state != MLX4_QP_STATE_RST && + new_state == MLX4_QP_STATE_ERR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; + } else if (new_state == MLX4_QP_STATE_RTR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1; + else + priv->mfunc.master.qp0_state[port].qp0_active = 1; + } + } + mlx4_free_cmd_mailbox(dev, mailbox); return ret; } -- cgit v1.2.3 From 992e8e6e8781b71fd475bd1fd0555da7dba59966 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:54 +0000 Subject: IB/mlx4: Miscellaneous adjustments for SR-IOV IB support 1. Allow only master to change node description. 2. Prevent AH leakage in send mads. 3. Take device part number from PCI structure, so that guests see the VF part number (and not the PF part number). 4. Place the device revision ID into caps structure at startup. 5. SET_PORT in update_gids_task needs to go through wrapper on master. 6. In mlx4_ib_event(), PORT_MGMT_EVENT needs be handled in a work queue on the master, since it propagates events to slaves using GEN_EQE. 7. Do not support FMR on slaves. 8. Add spinlock to slave_event(), since it is called both in interrupt context and in process context (due to 6 above, and also if smp_snoop is used). This fix was found and implemented by Saeed Mahameed Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 6 +++++- drivers/infiniband/hw/mlx4/main.c | 26 ++++++++++++++++++-------- drivers/net/ethernet/mellanox/mlx4/cmd.c | 1 + drivers/net/ethernet/mellanox/mlx4/eq.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + 5 files changed, 31 insertions(+), 11 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b689dbd6d8f6..b91b4865d635 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -709,7 +709,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, if (!out_mad->mad_hdr.status) { if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + /* slaves get node desc from FW */ + if (!mlx4_is_slave(to_mdev(ibdev)->dev)) + node_desc_override(ibdev, out_mad); } /* set return bit in status of directed route responses */ @@ -792,6 +794,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { + if (mad_send_wc->send_buf->context[0]) + ib_destroy_ah(mad_send_wc->send_buf->context[0]); ib_free_send_mad(mad_send_wc->send_buf); } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7d97578fbbaa..46303b209ce6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -138,7 +138,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->vendor_part_id = dev->dev->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -478,6 +478,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; + if (mlx4_is_slave(to_mdev(ibdev)->dev)) + return -EOPNOTSUPP; + spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); @@ -493,7 +496,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, - MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); @@ -921,6 +924,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; + dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: @@ -1009,7 +1013,7 @@ static void update_gids_task(struct work_struct *work) err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_NATIVE); + MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); else { @@ -1400,10 +1404,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + if (!mlx4_is_slave(ibdev->dev)) { + ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; + ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; @@ -1615,7 +1621,11 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); ew->ib_dev = ibdev; - handle_port_mgmt_change_event(&ew->work); + /* need to queue only for port owner, which uses GEN_EQE */ + if (mlx4_is_master(dev)) + queue_work(wq, &ew->work); + else + handle_port_mgmt_change_event(&ew->work); return; case MLX4_DEV_EVENT_SLAVE_INIT: diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ea5c884ab899..78fdbd653fa9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1757,6 +1757,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) INIT_WORK(&priv->mfunc.master.slave_flr_event_work, mlx4_master_handle_slave_flr); spin_lock_init(&priv->mfunc.master.slave_state_lock); + spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); priv->mfunc.master.comm_wq = create_singlethread_workqueue("mlx4_comm"); if (!priv->mfunc.master.comm_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index c425826b1fc0..51c764901ad2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -164,13 +164,16 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; - struct mlx4_eqe *s_eqe = - &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; + struct mlx4_eqe *s_eqe; + unsigned long flags; + spin_lock_irqsave(&slave_eq->event_lock, flags); + s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; if ((!!(s_eqe->owner & 0x80)) ^ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. " "No free EQE on slave events queue\n", slave); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); return; } @@ -183,6 +186,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_event_work); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); } static void mlx4_slave_event(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 23f74759f403..399793a23457 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -473,6 +473,7 @@ struct mlx4_slave_event_eq { u32 eqn; u32 cons; u32 prod; + spinlock_t event_lock; struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; }; -- cgit v1.2.3 From 026149cbaada391d98f1cbec47c488cb548f753a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:55 +0000 Subject: mlx4: Activate SR-IOV mode for IB Remove the error returns for IB ports from mlx4_ib_add, mlx4_INIT_PORT_wrapper, and mlx4_CLOSE_PORT_wrapper. Currently, SRIOV is supported only for devices for which the link layer is IB on all ports; RoCE support will be added later. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 ------ include/linux/mlx4/device.h | 4 ++++ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 46303b209ce6..e849347ef99a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1293,11 +1293,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_info_once("%s", mlx4_ib_version); - if (mlx4_is_mfunc(dev)) { - pr_warn("IB not yet supported in SRIOV\n"); + mlx4_foreach_non_ib_transport_port(i, dev) + num_ports++; + + if (mlx4_is_mfunc(dev) && num_ports) { + dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n"); return NULL; } + num_ports = 0; mlx4_foreach_ib_transport_port(i, dev) num_ports++; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index bf159c0fe82b..1d74e8541146 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1391,9 +1391,6 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { /* Enable port only if it was previously disabled */ if (!priv->mfunc.master.init_port_ref[port]) { @@ -1489,9 +1486,6 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, (1 << port))) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b3f9b4500b9b..9b243df789ef 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -697,6 +697,10 @@ struct mlx4_init_port_param { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) +#define mlx4_foreach_non_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) + #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ -- cgit v1.2.3 From afa8fd1db9f295a0c4130bc6d87bf8b05bdd0523 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:56 +0000 Subject: mlx4: Paravirtualize Node Guids for slaves This is necessary in order to support > 1 VF/PF in a VM for software that uses the node guid as a discriminator, such as librdmacm. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 14 ++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 +++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 22 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/device.h | 2 ++ 6 files changed, 53 insertions(+) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b91b4865d635..603a114b3dfe 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,12 @@ static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, int block, u32 change_bitmap); +__be64 mlx4_ib_gen_node_guid(void) +{ +#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) + return cpu_to_be64(NODE_GUID_HI | random32()); +} + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) { return cpu_to_be64(atomic_inc_return(&ctx->tid)) | @@ -1962,6 +1969,13 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) return 0; } + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (i == mlx4_master_func_num(dev->dev)) + mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid); + else + mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid()); + } + err = mlx4_ib_init_alias_guid_service(dev); if (err) { mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e57a220a4d55..e04cbc9a54a5 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -723,4 +723,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); +__be64 mlx4_ib_gen_node_guid(void); + + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78fdbd653fa9..90774b7e47a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -696,6 +696,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; + __be64 slave_node_guid; port = vhcr->in_modifier; if (smp->base_version == 1 && @@ -755,6 +756,16 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } return err; } + if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + slave_node_guid = mlx4_get_slave_node_guid(dev, slave); + memcpy(outsmp->data + 12, &slave_node_guid, 8); + } + return err; + } } } if (slave != mlx4_master_func_num(dev) && diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2294b7173180..76f69fdd01d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -435,6 +435,28 @@ void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int } EXPORT_SYMBOL(mlx4_sync_pkey_table); +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->slave_node_guids[slave] = guid; +} +EXPORT_SYMBOL(mlx4_put_slave_node_guid); + +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return 0; + + return priv->slave_node_guids[slave]; +} +EXPORT_SYMBOL(mlx4_get_slave_node_guid); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 399793a23457..4b1fbd84a68f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -810,6 +810,7 @@ struct mlx4_priv { int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + __be64 slave_node_guids[MLX4_MFUNC_MAX]; }; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9b243df789ef..c0f8f74a0a5e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -981,5 +981,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 47605df953985c2b792ac9f3ddf70d270b89adb8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:57 +0000 Subject: mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations Previously, the structure of a guest's proxy QPs followed the structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1, qp1 port 2, ...). The guest then did offset calculations on the sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP(). This is now changed so that the guest does no offset calculations regarding proxy or tunnel QPs to use. This change frees the PPF from needing to adhere to a specific order in allocating proxy and tunnel QPs. Now QUERY_FUNC_CAP provides each port individually with its proxy qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are used directly where required (with no offset calculations). To accomplish this change, several fields were added to the phys_caps structure for use by the PPF and by non-SR-IOV mode: base_sqpn -- in non-sriov mode, this was formerly sqp_start. base_proxy_sqpn -- the first physical proxy qp number -- used by PPF base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF. The current code in the PPF still adheres to the previous layout of sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this layout without affecting VF or (paravirtualized) PF code. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 12 +-- drivers/infiniband/hw/mlx4/qp.c | 104 ++++++++++++------ drivers/net/ethernet/mellanox/mlx4/fw.c | 173 +++++++++++++++++------------- drivers/net/ethernet/mellanox/mlx4/fw.h | 14 +-- drivers/net/ethernet/mellanox/mlx4/main.c | 61 ++++++++--- drivers/net/ethernet/mellanox/mlx4/qp.c | 71 ++++++++---- include/linux/mlx4/device.h | 16 +-- 7 files changed, 290 insertions(+), 161 deletions(-) (limited to 'drivers/net') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 603a114b3dfe..658a622791fb 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -505,7 +505,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; - dqpn = dev->dev->caps.sqp_start + 8 * slave + port + (dest_qpt * 2) - 1; + dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1; /* get tunnel tx data buf for slave */ src_qp = tun_qp->qp; @@ -1074,9 +1074,9 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) { - int slave_start = dev->dev->caps.sqp_start + 8 * slave; + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - return (qpn >= slave_start && qpn <= slave_start + 1); + return (qpn >= proxy_start && qpn <= proxy_start + 1); } @@ -1191,14 +1191,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc int slave; /* Get slave that sent this packet */ - if (wc->src_qp < dev->dev->caps.sqp_start || - wc->src_qp >= dev->dev->caps.base_tunnel_sqpn || + if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || + wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX || (wc->src_qp & 0x1) != ctx->port - 1 || wc->src_qp & 0x4) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp); return; } - slave = ((wc->src_qp & ~0x7) - dev->dev->caps.sqp_start) / 8; + slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8; if (slave != ctx->slave) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " "belongs to another slave\n", wc->src_qp); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a8622510de42..96fe103f5973 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -116,33 +116,57 @@ static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) if (!mlx4_is_master(dev->dev)) return 0; - return qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn < dev->dev->caps.base_sqpn + - 8 + 16 * MLX4_MFUNC_MAX; + return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && + qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + + 8 * MLX4_MFUNC_MAX; } static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return ((mlx4_is_master(dev->dev) && - qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn <= dev->dev->caps.base_sqpn + 3) || - (qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 3)); + int proxy_sqp = 0; + int real_sqp = 0; + int i; + /* PPF or Native -- real SQP */ + real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); + if (real_sqp) + return 1; + /* VF or PF -- proxy SQP */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || + qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { + proxy_sqp = 1; + break; + } + } + } + return proxy_sqp; } /* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - int qp0; - - /* qp0 is either the proxy qp0, or the real qp0 */ - qp0 = (qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 1) || - (mlx4_is_master(dev->dev) && - qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn <= dev->dev->caps.base_sqpn + 1); - - return qp0; + int proxy_qp0 = 0; + int real_qp0 = 0; + int i; + /* PPF or Native -- real QP0 */ + real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); + if (real_qp0) + return 1; + /* VF or PF -- proxy QP0 */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { + proxy_qp0 = 1; + break; + } + } + } + return proxy_qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) @@ -607,8 +631,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; - qpn = dev->dev->caps.base_tunnel_sqpn + 8 * tnl_init->slave + - tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; + /* we are definitely in the PPF here, since we are creating + * tunnel QPs. base_tunnel_sqpn is therefore valid. */ + qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave + + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; sqpn = qpn; } @@ -630,12 +656,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->mlx4_ib_qp_type = qp_type; - if (mlx4_is_mfunc(dev->dev) && - (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI)) { - qpn -= 8; - sqpn -= 8; - } - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -935,6 +955,23 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, del_gid_entries(qp); } +static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) +{ + /* Native or PPF */ + if (!mlx4_is_mfunc(dev->dev) || + (mlx4_is_master(dev->dev) && + attr->create_flags & MLX4_IB_SRIOV_SQP)) { + return dev->dev->phys_caps.base_sqpn + + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + + attr->port_num - 1; + } + /* PF or VF -- creating proxies */ + if (attr->qp_type == IB_QPT_SMI) + return dev->dev->caps.qp0_proxy[attr->port_num - 1]; + else + return dev->dev->caps.qp1_proxy[attr->port_num - 1]; +} + struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -998,9 +1035,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - to_mdev(pd->device)->dev->caps.sqp_start + - (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + - init_attr->port_num - 1, + get_sqp_num(to_mdev(pd->device), init_attr), &qp); if (err) return ERR_PTR(err); @@ -1643,8 +1678,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); else sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.base_tunnel_sqpn + - sqp->qp.port - 1); + cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) @@ -2012,10 +2046,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - dseg->dqpn = cpu_to_be32(dev->dev->caps.base_tunnel_sqpn + - qpt * 2 + port - 1); - /* use well-known qkey from the QPC */ - dseg->qkey = cpu_to_be32(0x80000000); + /* This function used only for sending on QP1 proxies */ + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + /* Use QKEY from the QP context, which is set by master */ + dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 1d74e8541146..e2ed36e2fb29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -184,8 +184,6 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 -#define QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET 0x44 -#define QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET 0x48 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -196,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 +#define QUERY_FUNC_CAP_QP0_PROXY 0x14 +#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 +#define QUERY_FUNC_CAP_QP1_PROXY 0x1c + #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { - field = vhcr->in_modifier; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = 0; /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + field = vhcr->in_modifier; /* phys-port = logical-port */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + + /* size is now the QP number */ + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); + + size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); @@ -249,117 +265,124 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - size = dev->caps.base_tunnel_sqpn + 8 * slave; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); - - size = dev->caps.sqp_start + 8 * slave; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); - } else err = -EINVAL; return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; - u8 field; + u8 field, op_modifier; u32 size; - int i; int err = 0; + op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP, + err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); - if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { - mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); - err = -EPROTONOSUPPORT; - goto out; - } - func_cap->flags = field; - - MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - func_cap->num_ports = field; + if (!op_modifier) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); + err = -EPROTONOSUPPORT; + goto out; + } + func_cap->flags = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); - func_cap->pf_context_behaviour = size; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + func_cap->num_ports = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - func_cap->max_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - func_cap->reserved_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + func_cap->max_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); - func_cap->base_tunnel_qpn = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + goto out; + } - MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); - func_cap->base_proxy_qpn = size & 0xFFFFFF; + /* logical port query */ + if (gen_or_port > dev->caps.num_ports) { + err = -EINVAL; + goto out; + } - for (i = 1; i <= func_cap->num_ports; ++i) { - err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, - MLX4_CMD_QUERY_FUNC_CAP, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); - if (err) + if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; goto out; + } - if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { - mlx4_err(dev, "VLAN is enforced on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { - mlx4_err(dev, "Force mac is enabled on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { - mlx4_err(dev, "phy_wqe_gid is " - "enforced on this ib port\n"); - err = -EPROTONOSUPPORT; - goto out; - } + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is " + "enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; } + } - MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - func_cap->physical_port[i] = field; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + func_cap->physical_port = field; + if (func_cap->physical_port != gen_or_port) { + err = -ENOSYS; + goto out; } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); + func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); + func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); + func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); + func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index ced1de57c818..85abe9c11a22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,11 +134,12 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 base_qpn; - u32 base_tunnel_qpn; - u32 base_proxy_qpn; - u8 physical_port[MLX4_MAX_PORTS + 1]; - u8 port_flags[MLX4_MAX_PORTS + 1]; + u32 qp0_tunnel_qpn; + u32 qp0_proxy_qpn; + u32 qp1_tunnel_qpn; + u32 qp1_proxy_qpn; + u8 physical_port; + u8 port_flags; }; struct mlx4_adapter { @@ -195,7 +196,8 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 76f69fdd01d5..1e2ab9d00f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -410,15 +410,16 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; - if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || - qpn < dev->caps.sqp_start) + + if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; - if (qpn >= dev->caps.base_tunnel_sqpn) + if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ - qk += qpn - dev->caps.base_tunnel_sqpn; + qk += qpn - dev->phys_caps.base_tunnel_sqpn; else - qk += qpn - dev->caps.sqp_start; + qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } @@ -527,9 +528,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n"); + mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", + err); return err; } @@ -557,12 +559,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + for (i = 1; i <= dev->caps.num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" + " port %d, aborting (%d).\n", i, err); + goto err_mem; + } + dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; + dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; + dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; + dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i])) - return -ENODEV; + goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -572,14 +595,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); - return -ENODEV; + goto err_mem; } - /* Calculate our sqp_start */ - dev->caps.sqp_start = func_cap.base_proxy_qpn; - dev->caps.base_tunnel_sqpn = func_cap.base_tunnel_qpn; - return 0; + +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + + return err; } /* @@ -2261,6 +2290,12 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 436ef6c69add..81e2abe07bbb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -68,15 +68,15 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) } /* used for INIT/CLOSE port logic */ -static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) +static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) { + /* this procedure is called after we already know we are on the master */ /* qp0 is either the proxy qp0, or the real qp0 */ - *proxy_qp0 = qp->qpn >= dev->caps.sqp_start && - qp->qpn <= dev->caps.sqp_start + 1; + u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); + *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; - *real_qp0 = mlx4_is_master(dev) && - qp->qpn >= dev->caps.base_sqpn && - qp->qpn <= dev->caps.base_sqpn + 1; + *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && + qp->qpn <= dev->phys_caps.base_sqpn + 1; return *real_qp0 || *proxy_qp0; } @@ -143,7 +143,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && - is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (proxy_qp0) priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; @@ -175,7 +175,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); - if (mlx4_is_master(dev) && is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && @@ -422,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; + int k; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -436,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->caps.base_sqpn = + dev->phys_caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -479,24 +480,54 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) */ err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->caps.base_sqpn + 8 + + (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), reserved_from_top); + if (err) + return err; - /* In mfunc, sqp_start is the base of the proxy SQPs, since the PF also - * uses paravirtualized SQPs. - * In native mode, sqp_start is the base of the real SQPs. */ if (mlx4_is_mfunc(dev)) { - dev->caps.sqp_start = dev->caps.base_sqpn + - 8 * (mlx4_master_func_num(dev) + 1); - dev->caps.base_tunnel_sqpn = dev->caps.sqp_start + 8 * MLX4_MFUNC_MAX; - } else - dev->caps.sqp_start = dev->caps.base_sqpn; + /* for PPF use */ + dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; + dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; + + /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, + * since the PF does not call mlx4_slave_caps */ + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + for (k = 0; k < dev->caps.num_ports; k++) { + dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + k; + dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; + dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + } + } + + + err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); if (err) - return err; + goto err_mem; + return 0; - return mlx4_CONF_SPECIAL_QP(dev, dev->caps.base_sqpn); +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + return err; } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c0f8f74a0a5e..6d1acb04cd17 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -328,6 +328,9 @@ struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; + u32 base_sqpn; + u32 base_proxy_sqpn; + u32 base_tunnel_sqpn; }; struct mlx4_caps { @@ -358,9 +361,10 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int sqp_start; - u32 base_sqpn; - u32 base_tunnel_sqpn; + u32 *qp0_proxy; + u32 *qp1_proxy; + u32 *qp0_tunnel; + u32 *qp1_tunnel; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -722,15 +726,15 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.base_sqpn + 8 + + return (qpn < dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) { - int base = dev->caps.sqp_start + slave * 8; + int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; - if (qpn >= base && qpn < base + 8) + if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) return 1; return 0; -- cgit v1.2.3 From 69612b9ff44ddc305b727a1f78de2248f9d70bf0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 23 Sep 2012 09:18:24 -0700 Subject: mlx4_core: Trivial readability fix: "0X30" -> "0x30" Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e2ed36e2fb29..419914bf4834 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -183,7 +183,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c -#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 +#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 -- cgit v1.2.3 From 84b1f1538fadd066cc9877c2276f3fefd8a79bc4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 25 Sep 2012 17:09:42 -0700 Subject: mlx4_core: Trivial cleanups to driver log messages Also put format string onto one line. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1e2ab9d00f1c..296288c7cb37 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2036,12 +2036,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) } if (num_vfs) { - mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs); + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); err = pci_enable_sriov(pdev, num_vfs); if (err) { - mlx4_err(dev, "Failed to enable sriov," - "continuing without sriov enabled" - " (err = %d).\n", err); + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", + err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); @@ -2284,7 +2283,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { - mlx4_warn(dev, "Disabling sriov\n"); + mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } -- cgit v1.2.3 From f3d4c89ee4a8c993cc334a67f84a3fb724a1dd35 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 25 Sep 2012 21:24:07 -0700 Subject: mlx4_core: Fix crash on uninitialized priv->cmd.slave_sem On an SR-IOV master device, __mlx4_init_one() calls mlx4_init_hca() before mlx4_multi_func_init(). However, for unlucky configurations, mlx4_init_hca() might call mlx4_SENSE_PORT() (via mlx4_dev_cap()), and that calls mlx4_cmd_imm() with MLX4_CMD_WRAPPED set. However, on a multifunction device with MLX4_CMD_WRAPPED, __mlx4_cmd() calls into mlx4_slave_cmd(), and that immediately tries to do down(&priv->cmd.slave_sem); but priv->cmd.slave_sem isn't initialized until mlx4_multi_func_init() (which we haven't called yet). The next thing it tries to do is access priv->mfunc.vhcr, but that hasn't been allocated yet. Fix this by moving the initialization of slave_sem and vhcr up into mlx4_cmd_init(). Also, since slave_sem is really just being used as a mutex, convert it into a slave_cmd_mutex. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 51 +++++++++++++++++++------------ drivers/net/ethernet/mellanox/mlx4/main.c | 11 ++++--- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- 3 files changed, 38 insertions(+), 26 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 90774b7e47a4..3d1899ff1076 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -395,7 +395,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; int ret; - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); + vhcr->in_param = cpu_to_be64(in_param); vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; vhcr->in_modifier = cpu_to_be32(in_modifier); @@ -403,6 +404,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); vhcr->status = 0; vhcr->flags = !!(priv->cmd.use_events) << 6; + if (mlx4_is_master(dev)) { ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); if (!ret) { @@ -439,7 +441,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, mlx4_err(dev, "failed execution of VHCR_POST command" "opcode 0x%x\n", op); } - up(&priv->cmd.slave_sem); + + mutex_unlock(&priv->cmd.slave_cmd_mutex); return ret; } @@ -1559,14 +1562,15 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) goto reset_slave; - down(&priv->cmd.slave_sem); + + mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { mlx4_err(dev, "Failed processing vhcr for slave:%d," " resetting slave.\n", slave); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); goto reset_slave; } - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); break; default: mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); @@ -1707,14 +1711,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) struct mlx4_slave_state *s_state; int i, j, err, port; - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, - &priv->mfunc.vhcr_dma, - GFP_KERNEL); - if (!priv->mfunc.vhcr) { - mlx4_err(dev, "Couldn't allocate vhcr.\n"); - return -ENOMEM; - } - if (mlx4_is_master(dev)) priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + @@ -1777,7 +1773,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - sema_init(&priv->cmd.slave_sem, 1); err = mlx4_ARM_COMM_CHANNEL(dev); if (err) { mlx4_err(dev, " Failed to arm comm channel eq: %x\n", @@ -1791,8 +1786,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) mlx4_err(dev, "Couldn't sync toggles\n"); goto err_comm; } - - sema_init(&priv->cmd.slave_sem, 1); } return 0; @@ -1822,6 +1815,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_init(&priv->cmd.hcr_mutex); + mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; priv->cmd.toggle = 1; @@ -1838,14 +1832,30 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } } + if (mlx4_is_mfunc(dev)) { + priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + &priv->mfunc.vhcr_dma, + GFP_KERNEL); + if (!priv->mfunc.vhcr) { + mlx4_err(dev, "Couldn't allocate VHCR.\n"); + goto err_hcr; + } + } + priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) - goto err_hcr; + goto err_vhcr; return 0; +err_vhcr: + if (mlx4_is_mfunc(dev)) + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; + err_hcr: if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); @@ -1868,9 +1878,6 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) } iounmap(priv->mfunc.comm); - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; } void mlx4_cmd_cleanup(struct mlx4_dev *dev) @@ -1881,6 +1888,10 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev) if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); + if (mlx4_is_mfunc(dev)) + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; } /* diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 296288c7cb37..1e9f816a94db 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1159,10 +1159,10 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) @@ -1214,7 +1214,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, @@ -1263,12 +1263,13 @@ static int mlx4_init_slave(struct mlx4_dev *dev) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; - up(&priv->cmd.slave_sem); + + mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 4b1fbd84a68f..71eed05426ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -513,9 +513,9 @@ struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; struct mutex hcr_mutex; + struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; - struct semaphore slave_sem; int max_cmds; spinlock_t context_lock; int free_head; -- cgit v1.2.3 From 839f12434c7618d326b9d1ece5eca643e5e48d0a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 27 Sep 2012 09:23:41 -0700 Subject: mlx4_core: Stash PCI ID driver_data in mlx4_priv structure That way we can check flags later on, when we've finished with the pci_device_id structure. Also convert the "is VF" flag to an enum: "Never do in the preprocessor what can be done in C." Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 29 ++++++++++++++++------------- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 ++++++ 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1e9f816a94db..c9732ca4191e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,8 +95,6 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); -#define MLX4_VF (1 << 0) - #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK 0 @@ -1916,7 +1914,7 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) iounmap(owner); } -static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; @@ -1939,12 +1937,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) /* * Check for BARs. */ - if (((id == NULL) || !(id->driver_data & MLX4_VF)) && + if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." - "(id == 0X%p, id->driver_data: 0x%lx," - " pci_resource_flags(pdev, 0):0x%lx)\n", id, - id ? id->driver_data : 0, pci_resource_flags(pdev, 0)); + "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", + pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } @@ -2009,7 +2006,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->rev_id = pdev->revision; /* Detect if this device is a virtual function */ - if (id && id->driver_data & MLX4_VF) { + if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (num_vfs && extended_func_num(pdev) > probe_vf) { @@ -2156,6 +2153,7 @@ slave_start: mlx4_sense_init(dev); mlx4_start_sense(dev); + priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; @@ -2225,7 +2223,7 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev, { printk_once(KERN_INFO "%s", mlx4_version); - return __mlx4_init_one(pdev, id); + return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) @@ -2305,8 +2303,13 @@ static void mlx4_remove_one(struct pci_dev *pdev) int mlx4_restart_one(struct pci_dev *pdev) { + struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_priv *priv = mlx4_priv(dev); + int pci_dev_data; + + pci_dev_data = priv->pci_dev_data; mlx4_remove_one(pdev); - return __mlx4_init_one(pdev, NULL); + return __mlx4_init_one(pdev, pci_dev_data); } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { @@ -2335,11 +2338,11 @@ static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT26478 ConnectX2 40GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x676e), 0 }, /* MT25400 Family [ConnectX-2 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF }, + { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF }, + { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ @@ -2368,7 +2371,7 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - int ret = __mlx4_init_one(pdev, NULL); + int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 71eed05426ec..ac58189ae6da 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -768,6 +768,10 @@ struct _rule_hw { }; }; +enum { + MLX4_PCI_DEV_IS_VF = 1 << 0, +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -775,6 +779,8 @@ struct mlx4_priv { struct list_head ctx_list; spinlock_t ctx_lock; + int pci_dev_data; + struct list_head pgdir_list; struct mutex pgdir_mutex; -- cgit v1.2.3 From ca3e57a599e1f3624a6164a5c3a655859368f7aa Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 27 Sep 2012 09:53:05 -0700 Subject: mlx4_core: Clean up enabling of SENSE_PORT for older (ConnectX-1/-2) HCAs Instead of having a hard-coded "PCI device ID != 0x1003" (which obviously breaks as newer devices with ID != 0x1003 become available), instead let's set a flag in our PCI device table for the older devices where we're supposed to force using SENSE_PORT. This also avoids enabling SENSE_PORT for virtual functions by mistake. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 28 ++++++++++++++-------------- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index c9732ca4191e..877d1122f787 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -297,8 +297,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX1 and 2 */ - if (dev->pdev->device != 0x1003) + /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ + if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; @@ -2314,29 +2314,29 @@ int mlx4_restart_one(struct pci_dev *pdev) static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ - { PCI_VDEVICE(MELLANOX, 0x6340), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ - { PCI_VDEVICE(MELLANOX, 0x634a), 0 }, + { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ - { PCI_VDEVICE(MELLANOX, 0x6354), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6732), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x673c), 0 }, + { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6368), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6750), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6372), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x675a), 0 }, + { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ - { PCI_VDEVICE(MELLANOX, 0x6764), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ - { PCI_VDEVICE(MELLANOX, 0x6746), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x676e), 0 }, + { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index ac58189ae6da..1cf42036d7bb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -770,6 +770,7 @@ struct _rule_hw { enum { MLX4_PCI_DEV_IS_VF = 1 << 0, + MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, }; struct mlx4_priv { -- cgit v1.2.3 From aadf4f3f66a7b710b05b31ac0839fbbf59e41f7c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 27 Sep 2012 10:01:19 -0700 Subject: mlx4_core: Disable SENSE_PORT for multifunction devices In the current driver, the SENSE_PORT firmware command is issued as a "wrapped" command, but the command handling code doesn't have a wrapper, so it will never do anything other than log an error message. The latest ConnectX-3 2.11.500 firmware reports the SENSE_PORT capability even in multi-function (SR-IOV) mode, so the driver will try to issue the command. At least until the driver has a proper wrapper for SENSE_PORT, make sure we disable the command for multi-function devices. Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 877d1122f787..bc1e5d41c292 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -300,6 +300,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; + /* Don't do sense port on multifunction devices (for now at least) */ + if (mlx4_is_mfunc(dev)) + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; -- cgit v1.2.3