From dd37818dbdf8e51d0288c0197c351c005ffcdbdb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 13 May 2008 11:27:25 -0700 Subject: RDMA/nes: Fix up nes_lro_max_aggr module parameter Fix some bugs with the max_aggr module parameter added with LRO support: - The module parameter value ignored and not actually used to set lro_mgr.max_aggr. - MODULE_PARM_DESC had a typo "_mro_" instead of "_lro_" so it didn't end up describing the actual module parameter. - The nes_lro_max_aggr variable was declared as unsigned, but the module_param line said "int" instead of "uint" for the type. - The default value for the parameter was stuck in the permissions field of module_param, which led to nonsensical permissions for the file under /sys/module/iw_nes/param. - The parameter was used in only one file but defined in another, which led to the variable being global for no good reason. Move everything related to the parameter to the file nes_hw.c where it is actually used. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 4 ---- drivers/infiniband/hw/nes/nes.h | 1 - drivers/infiniband/hw/nes/nes_hw.c | 6 +++++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 9f7364a9096d..a4e9269a29bd 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); -unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR); -MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation"); - LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 1f9f7bf73862..61b46e9c7d2d 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -173,7 +173,6 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; -extern unsigned int nes_lro_max_aggr; extern struct list_head nes_adapter_list; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8dc70f9bad2f..d3278f111ca7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -42,6 +42,10 @@ #include "nes.h" +static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, uint, 0444); +MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); + static u32 crit_err_count; u32 int_mod_timer_init; u32 int_mod_cq_depth_256; @@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } - nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; -- cgit v1.2.3 From 53dc1ca194c062aa9771e194047f27ec1ca592df Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:40:25 -0700 Subject: IB/ipath: Fix RC and UC error handling When errors are detected in RC, the QP should transition to the IB_QPS_ERR state, not the IB_QPS_SQE state. Also, when the error is on the responder side, the receive work completion error was incorrect (remote vs. local). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 54 ++-------- drivers/infiniband/hw/ipath/ipath_rc.c | 127 ++++++++--------------- drivers/infiniband/hw/ipath/ipath_ruc.c | 165 ++++++++++++++---------------- drivers/infiniband/hw/ipath/ipath_verbs.c | 4 +- drivers/infiniband/hw/ipath/ipath_verbs.h | 6 +- 5 files changed, 132 insertions(+), 224 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index dd5b6e9d57c2..6f98632877eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -374,13 +374,14 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } /** - * ipath_error_qp - put a QP into an error state - * @qp: the QP to put into an error state + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * Returns true if last WQE event should be generated. * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. */ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -389,8 +390,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ib_wc wc; int ret = 0; - ipath_dbg("QP%d/%d in error state (%d)\n", - qp->ibqp.qp_num, qp->remote_qpn, err); + if (qp->state == IB_QPS_ERR) + goto bail; + + qp->state = IB_QPS_ERR; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) @@ -460,6 +463,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) } else if (qp->ibqp.event_handler) ret = 1; +bail: return ret; } @@ -1025,48 +1029,6 @@ bail: return ret; } -/** - * ipath_sqerror_qp - put a QP's send queue into an error state - * @qp: QP who's send queue will be put into an error state - * @wc: the WC responsible for putting the QP in this state - * - * Flushes the send work queue. - * The QP s_lock should be held and interrupts disabled. - */ - -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - ipath_dbg("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); - - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - - wc->status = IB_WC_WR_FLUSH_ERR; - - while (qp->s_last != qp->s_head) { - wqe = get_swqe_ptr(qp, qp->s_last); - wc->wr_id = wqe->wr.wr_id; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->state = IB_QPS_SQE; -} - /** * ipath_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 08b11b567614..b4b26c3aa613 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -771,27 +771,14 @@ done: * * The QP s_lock should be held and interrupts disabled. */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) +void ipath_restart_rc(struct ipath_qp *qp, u32 psn) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; if (qp->s_retry == 0) { - wc->wr_id = wqe->wr.wr_id; - wc->status = IB_WC_RETRY_EXC_ERR; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc->vendor_err = 0; - wc->byte_len = 0; - wc->qp = &qp->ibqp; - wc->imm_data = 0; - wc->src_qp = qp->remote_qpn; - wc->wc_flags = 0; - wc->pkey_index = 0; - wc->slid = qp->remote_ah_attr.dlid; - wc->sl = qp->remote_ah_attr.sl; - wc->dlid_path_bits = 0; - wc->port_num = 0; - ipath_sqerror_qp(qp, wc); + ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); goto bail; } qp->s_retry--; @@ -804,6 +791,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); if (wqe->wr.opcode == IB_WR_RDMA_READ) @@ -845,6 +834,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; + enum ib_wc_status status; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; @@ -909,7 +899,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ - ipath_restart_rc(qp, wqe->psn, &wc); + ipath_restart_rc(qp, wqe->psn); /* * No need to process the ACK/NAK since we are * restarting an earlier request. @@ -937,20 +927,15 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); } qp->s_retry = qp->s_retry_cnt; @@ -1012,7 +997,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_last == qp->s_tail) goto bail; if (qp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; + status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; } if (qp->s_rnr_retry_cnt < 7) @@ -1050,37 +1035,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, * RDMA READ response which terminates the RDMA * READ. */ - ipath_restart_rc(qp, psn, &wc); + ipath_restart_rc(qp, psn); break; case 1: /* Invalid Request */ - wc.status = IB_WC_REM_INV_REQ_ERR; + status = IB_WC_REM_INV_REQ_ERR; dev->n_other_naks++; goto class_b; case 2: /* Remote Access Error */ - wc.status = IB_WC_REM_ACCESS_ERR; + status = IB_WC_REM_ACCESS_ERR; dev->n_other_naks++; goto class_b; case 3: /* Remote Operation Error */ - wc.status = IB_WC_REM_OP_ERR; + status = IB_WC_REM_OP_ERR; dev->n_other_naks++; class_b: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &qp->ibqp; - wc.imm_data = 0; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: @@ -1126,8 +1099,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, int header_in_data) { struct ipath_swqe *wqe; + enum ib_wc_status status; unsigned long flags; - struct ib_wc wc; int diff; u32 pad; u32 aeth; @@ -1159,6 +1132,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, if (unlikely(qp->s_last == qp->s_tail)) goto ack_done; wqe = get_swqe_ptr(qp, qp->s_last); + status = IB_WC_SUCCESS; switch (opcode) { case OP(ACKNOWLEDGE): @@ -1200,7 +1174,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1261,7 +1235,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1291,31 +1265,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } -ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - ack_op_err: - wc.status = IB_WC_LOC_QP_OP_ERR; + status = IB_WC_LOC_QP_OP_ERR; goto ack_err; ack_len_err: - wc.status = IB_WC_LOC_LEN_ERR; + status = IB_WC_LOC_LEN_ERR; ack_err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); +ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); bail: return; @@ -1523,13 +1482,12 @@ send_ack: return 0; } -static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) { unsigned long flags; int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; lastwqe = ipath_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1643,11 +1601,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; - nack_inv: - ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); - qp->r_nak_state = IB_NAK_INVALID_REQUEST; - qp->r_ack_psn = qp->r_psn; - goto send_ack; + goto nack_inv; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_MIDDLE): @@ -1673,18 +1627,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, break; } - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; qp->r_rcv_len = 0; /* FALLTHROUGH */ case OP(SEND_MIDDLE): @@ -1751,14 +1700,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; else wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -1951,11 +1896,21 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto send_ack; goto done; +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_inv: + ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + nack_acc: - ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); + ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; - send_ack: send_rc_ack(qp); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 9e3fe61cbd08..c716a03dd399 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -140,20 +140,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, goto bail; bad_lkey: + memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal solicited completion event. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ret = 0; @@ -270,6 +261,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) struct ib_wc wc; u64 sdata; atomic64_t *maddr; + enum ib_wc_status send_status; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); if (!qp) { @@ -300,8 +292,8 @@ again: wqe = get_swqe_ptr(sqp, sqp->s_last); spin_unlock_irqrestore(&sqp->s_lock, flags); - wc.wc_flags = 0; - wc.imm_data = 0; + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; @@ -313,75 +305,33 @@ again: wc.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - /* Handle RNR NAK */ - if (qp->ibqp.qp_type == IB_QPT_UC) - goto send_comp; - if (sqp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; - goto err; - } - if (sqp->s_rnr_retry_cnt < 7) - sqp->s_rnr_retry--; - dev->n_rnr_naks++; - sqp->s_rnr_timeout = - ib_ipath_rnr_table[qp->r_min_rnr_timer]; - ipath_insert_rnr_queue(sqp); - goto done; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; break; case IB_WR_RDMA_WRITE_WITH_IMM: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_WRITE))) { - acc_err: - wc.status = IB_WC_REM_ACCESS_ERR; - err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &sqp->ibqp; - wc.src_qp = sqp->remote_qpn; - wc.pkey_index = 0; - wc.slid = sqp->remote_ah_attr.dlid; - wc.sl = sqp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - spin_lock_irqsave(&sqp->s_lock, flags); - ipath_sqerror_qp(sqp, &wc); - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; break; case IB_WR_RDMA_READ: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, @@ -394,11 +344,8 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, @@ -415,7 +362,8 @@ again: goto send_comp; default: - goto done; + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; } sge = &sqp->s_sge.sge; @@ -458,14 +406,11 @@ again: wc.opcode = IB_WC_RECV; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.vendor_err = 0; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, @@ -473,9 +418,63 @@ again: send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); + ipath_send_complete(sqp, wqe, send_status); goto again; +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; + dev->n_rnr_naks++; + sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; + ipath_insert_rnr_queue(sqp); + goto unlock; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + ipath_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + ipath_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~IPATH_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -651,21 +650,15 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS) { struct ib_wc wc; + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = status; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); } spin_lock_irqsave(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 5015cd2e57bd..22bb42dc8f73 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -744,12 +744,10 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) /* XXX What if timer fires again while this is running? */ for (qp = resend; qp != NULL; qp = qp->timer_next) { - struct ib_wc wc; - spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { dev->n_timeouts++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6514aa8306cd..4c7c2aa8e19d 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -710,8 +710,6 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); - void ipath_get_credit(struct ipath_qp *qp, u32 aeth); unsigned ipath_ib_rate_to_mult(enum ib_rate rate); @@ -729,7 +727,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int has_grh, void *data, u32 tlen, struct ipath_qp *qp); -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); +void ipath_restart_rc(struct ipath_qp *qp, u32 psn); + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); -- cgit v1.2.3 From e509be898d8937634437caa474b57ac12795e5bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:41:29 -0700 Subject: IB/ipath: Fix many locking issues when switching to error state The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 183 +++++++++++++------------- drivers/infiniband/hw/ipath/ipath_rc.c | 151 +++++++++++++-------- drivers/infiniband/hw/ipath/ipath_ruc.c | 168 +++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_uc.c | 57 +++++--- drivers/infiniband/hw/ipath/ipath_ud.c | 66 +++++++--- drivers/infiniband/hw/ipath/ipath_user_sdma.h | 2 - drivers/infiniband/hw/ipath/ipath_verbs.c | 174 ++++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_verbs.h | 57 ++++++-- 8 files changed, 554 insertions(+), 304 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6f98632877eb..4715911101e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) { struct ipath_qp *q, **qpp; unsigned long flags; - int fnd = 0; spin_lock_irqsave(&qpt->lock, flags); @@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) *qpp = qp->next; qp->next = NULL; atomic_dec(&qp->refcount); - fnd = 1; break; } } spin_unlock_irqrestore(&qpt->lock, flags); - - if (!fnd) - return; - - free_qpn(qpt, qp->ibqp.qp_num); - - wait_event(qp->wait, !atomic_read(&qp->refcount)); } /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) { unsigned long flags; - struct ipath_qp *qp, *nqp; - u32 n; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + spin_lock_irqsave(&qpt->lock, flags); for (n = 0; n < qpt->max; n++) { - spin_lock_irqsave(&qpt->lock, flags); qp = qpt->table[n]; qpt->table[n] = NULL; - spin_unlock_irqrestore(&qpt->lock, flags); - - while (qp) { - nqp = qp->next; - free_qpn(qpt, qp->ibqp.qp_num); - if (!atomic_dec_and_test(&qp->refcount) || - !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg("QP memory leak!\n"); - qp = nqp; - } + + for (; qp; qp = qp->next) + qp_inuse++; } + spin_unlock_irqrestore(&qpt->lock, flags); - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) if (qpt->map[n].page) - free_page((unsigned long)qpt->map[n].page); - } + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; } /** @@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - qp->s_busy = 0; + atomic_set(&qp->s_dma_busy, 0); qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_pkt_delay = 0; + qp->s_draining = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; - qp->r_wrid_valid = 0; + qp->r_aflags = 0; + qp->r_flags = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->s_wait_credit = 0; memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; @@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } - qp->r_reuse_sge = 0; } /** @@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + + memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - if (qp->r_wrid_valid) { - qp->r_wrid_valid = 0; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; - wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; - while (qp->s_last != qp->s_head) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->s_hdrwords = 0; - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - if (qp->r_rq.wq) { struct ipath_rwq *wq; u32 head; @@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; - wc.opcode = IB_WC_RECV; while (tail != head) { wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) @@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; - unsigned long flags; int lastwqe = 0; int ret; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } ipath_reset_qp(qp, ibqp->qp_type); break; + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + case IB_QPS_ERR: lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: + qp->state = new_state; break; - } if (attr_mask & IB_QP_PKEY_INDEX) @@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; - qp->state = new_state; - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); if (lastwqe) { struct ib_event ev; @@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto bail; inval: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: @@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = qp->s_pkey_index; attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; - attr->sq_draining = 0; + attr->sq_draining = qp->s_draining; attr->max_rd_atomic = qp->s_max_rd_atomic; attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; @@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); @@ -930,6 +924,7 @@ bail_ip: else vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); bail_qp: kfree(qp); bail_swq: @@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; - spin_unlock_irqrestore(&qp->s_lock, flags); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet. */ - tasklet_kill(&qp->s_task); + ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; } - /* Make sure the QP isn't on the timeout list. */ - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - if (qp->s_tx) - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); + wait_event(qp->wait, !atomic_read(&qp->refcount)); - /* - * Make sure that the QP is not in the QPN table so receive - * interrupts will discard packets for this QP. XXX Also remove QP - * from multicast table. - */ - if (atomic_read(&qp->refcount) != 0) - ipath_free_qp(&dev->qp_table, qp); + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); @@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) } /* Restart sending if it was blocked due to lack of credits. */ - if (qp->s_cur != qp->s_head && + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && (qp->s_lsn == (u32) -1 || ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, qp->s_lsn + 1) <= 0)) - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index b4b26c3aa613..5b5276a270bc 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, u32 bth0; u32 bth2; + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout || qp->s_wait_credit) - goto bail; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } - /* Limit the number of packets sent without an ACK. */ - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { - qp->s_wait_credit = 1; - dev->n_rc_stalls++; + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; goto bail; } @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* * Resend an old request or start a new one. * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: ret = 1; + goto unlock; + bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); + /* Don't try to send ACKs if the link isn't ACTIVE */ dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) goto done; queue_ack: - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); - done: return; } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_do_rc_send(). + * See ipath_make_rc_req(). */ switch (opcode) { case IB_WR_SEND: @@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn) dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); bail: return; @@ -809,13 +840,7 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_last_psn != psn) { - qp->s_last_psn = psn; - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); - } - } + qp->s_last_psn = psn; } /** @@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) { - qp->s_flags &= ~IPATH_S_FENCE_PENDING; - tasklet_hi_schedule(&qp->s_task); - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; - tasklet_hi_schedule(&qp->s_task); - } + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); @@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ if (ipath_cmp24(qp->s_psn, psn) <= 0) { reset_psn(qp, psn + 1); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); @@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK]; ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); goto bail; case 3: /* NAK */ @@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + /* Ignore invalid responses. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); unlock_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) { - unsigned long flags; unsigned next; next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); if (n == qp->s_tail_ack_queue) { qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); } - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; + unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (!qp->r_wrid_valid) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; - qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || @@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) - goto nack_acc; + goto nack_acc_unlck; /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. @@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } case OP(COMPARE_SWAP): @@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } if (!header_in_data) @@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv; + goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); @@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } default: @@ -1901,19 +1937,26 @@ rnr_nak: qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_inv: ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_acc: ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: send_rc_ack(qp); + goto done; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index c716a03dd399..a4b5521567fe 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * @qp: the QP * + * Called with the QP s_lock held and interrupts disabled. * XXX Use a simple list for now. We might need a priority * queue if we have lots of QPs waiting for RNR timeouts * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { void ipath_insert_rnr_queue(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); if (list_empty(&dev->rnrwait)) list_add(&qp->timerwait, &dev->rnrwait); else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock(&dev->pending_lock); } /** @@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ @@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) tail = 0; do { if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); ret = 0; - goto bail; + goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); @@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; - qp->r_wrid_valid = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) goto bail; } } +unlock: spin_unlock_irqrestore(&rq->lock, flags); - bail: return ret; } @@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) atomic64_t *maddr; enum ib_wc_status send_status; + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - if (!qp) { - dev->n_pkt_drops++; - return; - } -again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - sqp->s_rnr_timeout) { - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; - /* Get the next send request. */ - if (sqp->s_last == sqp->s_head) { - /* Send work queue is empty. */ - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. */ - wqe = get_swqe_ptr(sqp, sqp->s_last); + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } spin_unlock_irqrestore(&sqp->s_lock, flags); + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; @@ -396,8 +425,7 @@ again: sqp->s_len -= len; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || - wqe->wr.opcode == IB_WR_RDMA_READ) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -417,6 +445,8 @@ again: wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; @@ -437,11 +467,12 @@ rnr_nak: sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); - goto unlock; + goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; @@ -473,17 +504,19 @@ serr: } goto done; } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (atomic_dec_and_test(&qp->refcount)) + if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) * @dev: the device we ran out of buffers on * * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; + int ret = 1; /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() - * could be called. If we are still in the tasklet function, - * tasklet_hi_schedule() will not call us until the next time - * tasklet_hi_schedule() is called. - * We leave the busy flag set so that another post send doesn't - * try to put the same QP on the piowait list again. + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. */ - spin_lock_irqsave(&dev->pending_lock, flags); - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd, qp); - dev->n_piowait++; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; } /** @@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data) struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); - - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) - goto bail; + unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); - goto clear; + goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) @@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data) else make_req = ipath_make_ud_req; + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -623,8 +677,8 @@ again: */ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { - ipath_no_bufs_available(qp, dev); - goto bail; + if (ipath_no_bufs_available(qp, dev)) + goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ @@ -633,16 +687,20 @@ again: if (make_req(qp)) goto again; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); + bail:; } +/* + * This should be called with s_lock held. + */ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - unsigned long flags; - u32 last; + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS); } - spin_lock_irqsave(&qp->s_lock, flags); - last = qp->s_last; + old_last = last = qp->s_last; if (++last >= qp->s_size) last = 0; qp->s_last = last; - spin_unlock_irqrestore(&qp->s_lock, flags); + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index bfe8926b5514..7fd18e833907 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp) { struct ipath_other_headers *ohdr; struct ipath_swqe *wqe; + unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); int ret = 0; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; + } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_wqe = NULL; switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) - goto done; + goto bail; /* * Start a new request. */ @@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) break; default: - goto done; + goto bail; } break; @@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp) ipath_make_ruc_header(to_idev(qp->ibqp.device), qp, ohdr, bth0 | (qp->s_state << 24), qp->s_next_psn++ & IPATH_PSN_MASK); +done: ret = 1; + goto unlock; -done: +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { @@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (qp->r_reuse_sge) { - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) { + qp->r_flags &= ~IPATH_R_REUSE_SGE; qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; @@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Check for invalid length. */ /* XXX LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, tlen -= (hdrsize + pad + 4); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 1)) { dev->n_pkt_drops++; goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 8b6a261c89e3..77ca8ca74e78 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) u32 length; qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp) { + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; - goto send_comp; + goto done; } rsge.sg_list = NULL; @@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) * present on the wire. */ length = swqe->length; + memset(&wc, 0, sizeof wc); wc.byte_len = length + sizeof(struct ib_grh); if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = swqe->wr.ex.imm_data; - } else { - wc.wc_flags = 0; - wc.imm_data = 0; } /* @@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ @@ -248,8 +245,7 @@ drop: kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); -send_comp: - ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); +done:; } /** @@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) struct ipath_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct ipath_swqe *wqe; + unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lid; int ret = 0; - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) - goto bail; + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } if (qp->s_cur == qp->s_head) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp) dev->n_unicast_xmit++; } else { dev->n_unicast_xmit++; - lid = ah_attr->dlid & - ~((1 << dev->dd->ipath_lmc) - 1); + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); if (unlikely(lid == dev->dd->ipath_lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); goto done; } } @@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; ret = 1; + goto unlock; bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. */ - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. @@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* Silently drop packets which are too big. */ if (wc.byte_len > qp->r_len) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto bail; } @@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); - qp->r_wrid_valid = 0; + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto bail; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index e70946c1428c..fc76316c4a58 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter); void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 22bb42dc8f73..e0ec540042bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma; module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | + IPATH_PROCESS_NEXT_SEND_OK, [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_ERR] = 0, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, }; struct ipath_ucontext { @@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) } } -static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wr->wr_id; - wc.status = IB_WC_WR_FLUSH_ERR; - wc.opcode = ib_ipath_wc_opcode[wr->opcode]; - wc.qp = &qp->ibqp; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -} - /* * Count the number of DMA descriptors needed to send length bytes of data. * Don't modify the ipath_sge_state to get the count. @@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) spin_lock_irqsave(&qp->s_lock, flags); /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { - if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) - goto bail_inval; - /* C10-96 says generate a flushed completion entry. */ - ipath_flush_wqe(qp, wr); - ret = 0; - goto bail; - } + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) + goto bail_inval; /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) @@ -677,6 +667,7 @@ bail:; static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; + struct ipath_qp *rnr = NULL; struct list_head *last; struct ipath_qp *qp; unsigned long flags; @@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) if (--qp->s_rnr_timeout == 0) { do { list_del_init(&qp->timerwait); - tasklet_hi_schedule(&qp->s_task); + qp->timer_next = rnr; + rnr = qp; + atomic_inc(&qp->refcount); if (list_empty(last)) break; qp = list_entry(last->next, struct ipath_qp, @@ -743,14 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) spin_unlock_irqrestore(&dev->pending_lock, flags); /* XXX What if timer fires again while this is running? */ - for (qp = resend; qp != NULL; qp = qp->timer_next) { + while (resend != NULL) { + qp = resend; + resend = qp->timer_next; + spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { + if (qp->s_last != qp->s_tail && + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { dev->n_timeouts++; ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + while (rnr != NULL) { + qp = rnr; + rnr = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -1010,13 +1020,24 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + unsigned int flags; + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - /* Generate a completion queue entry if needed */ - if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if (tx->wqe) + ipath_send_complete(qp, tx->wqe, ibs); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } else if (tx->wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, tx->wqe, ibs); + spin_unlock_irqrestore(&qp->s_lock, flags); } if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) @@ -1027,6 +1048,21 @@ static void sdma_complete(void *cookie, int status) wake_up(&qp->wait); } +static void decrement_dma_busy(struct ipath_qp *qp) +{ + unsigned int flags; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } +} + /* * Compute the number of clock cycles of delay before sending the next packet. * The multipliers reflect the number of clocks for the fastest rate so @@ -1065,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, if (tx) { qp->s_tx = NULL; /* resend previously constructed packet */ + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) + if (ret) { qp->s_tx = tx; + decrement_dma_busy(qp); + } goto bail; } @@ -1118,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->txreq.sg_count = ndesc; tx->map_len = (hdrwords + 2) << 2; tx->txreq.map_addr = &tx->hdr; + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); if (ret) { /* save ss and length in dwords */ tx->ss = ss; tx->len = dwords; qp->s_tx = tx; + decrement_dma_busy(qp); } goto bail; } @@ -1144,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, memcpy(piobuf, hdr, hdrwords << 2); ipath_copy_from_sge(piobuf + hdrwords, ss, len); + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); /* * If we couldn't queue the DMA request, save the info @@ -1154,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->ss = NULL; tx->len = 0; qp->s_tx = tx; + decrement_dma_busy(qp); } dev->n_unaligned++; goto bail; @@ -1177,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; + unsigned int flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1247,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, } copy_io(piobuf, ss, len, flush_wc); done: - if (qp->s_wqe) + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } ret = 0; bail: return ret; @@ -1281,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, * can defer SDMA restart until link goes ACTIVE without * worrying about just how we got there. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, plen, dwords); - /* All non-VL15 packets are dropped if link is not ACTIVE */ - else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { - if (qp->s_wqe) - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - ret = 0; - } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); else - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, plen, dwords); return ret; @@ -1401,27 +1441,46 @@ bail: * This is called from ipath_intr() at interrupt level when a PIO buffer is * available after ipath_verbs_send() returned an error that no buffers were * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and + * QPs waiting for buffers (for now, just restart the send tasklet and * return zero). */ int ipath_ib_piobufavail(struct ipath_ibdev *dev) { + struct list_head *list; + struct ipath_qp *qplist; struct ipath_qp *qp; unsigned long flags; if (dev == NULL) goto bail; + list = &dev->piowait; + qplist = NULL; + spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(&dev->piowait)) { - qp = list_entry(dev->piowait.next, struct ipath_qp, - piowait); + while (!list_empty(list)) { + qp = list_entry(list->next, struct ipath_qp, piowait); list_del_init(&qp->piowait); - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_hi_schedule(&qp->s_task); + qp->pio_next = qplist; + qplist = qp; + atomic_inc(&qp->refcount); } spin_unlock_irqrestore(&dev->pending_lock, flags); + while (qplist != NULL) { + qp = qplist; + qplist = qp->pio_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + bail: return 0; } @@ -2143,11 +2202,12 @@ bail: void ipath_unregister_ib_device(struct ipath_ibdev *dev) { struct ib_device *ibdev = &dev->ibdev; - - disable_timer(dev->dd); + u32 qps_inuse; ib_unregister_device(ibdev); + disable_timer(dev->dd); + if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) @@ -2162,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! */ - ipath_free_all_qps(&dev->qp_table); + qps_inuse = ipath_free_all_qps(&dev->qp_table); + if (qps_inuse) + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", + qps_inuse); kfree(dev->qp_table.table); kfree(dev->lk_table.table); kfree(dev->txreq_bufs); @@ -2213,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr, "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" - "RC stalls %d\n" "piobuf wait %d\n" - "no piobuf %d\n" "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_unaligned, + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 4c7c2aa8e19d..deee02ca7ca4 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -74,6 +74,11 @@ #define IPATH_POST_RECV_OK 0x02 #define IPATH_PROCESS_RECV_OK 0x04 #define IPATH_PROCESS_SEND_OK 0x08 +#define IPATH_PROCESS_NEXT_SEND_OK 0x10 +#define IPATH_FLUSH_SEND 0x20 +#define IPATH_FLUSH_RECV 0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 @@ -353,12 +358,14 @@ struct ipath_qp { struct ib_qp ibqp; struct ipath_qp *next; /* link list for QPN hash table */ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ struct list_head piowait; /* link for wait PIO buf */ struct list_head timerwait; /* link for waiting for timeouts */ struct ib_ah_attr remote_ah_attr; struct ipath_ib_header s_hdr; /* next packet header to send */ atomic_t refcount; wait_queue_head_t wait; + wait_queue_head_t wait_dma; struct tasklet_struct s_task; struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; @@ -369,7 +376,7 @@ struct ipath_qp { struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_busy; + atomic_t s_dma_busy; u16 s_pkt_delay; u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ @@ -383,6 +390,7 @@ struct ipath_qp { u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -394,8 +402,7 @@ struct ipath_qp { u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_reuse_sge; /* for UC receive errors */ - u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_flags; u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; @@ -404,13 +411,13 @@ struct ipath_qp { u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_flags; u8 s_dmult; + u8 s_draining; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -428,16 +435,39 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; -/* Bit definition for s_busy. */ -#define IPATH_S_BUSY 0 +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID 0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE 0x01 /* * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA. */ #define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_FENCE_PENDING 0x02 #define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_ACK_PENDING 0x08 +#define IPATH_S_BUSY 0x10 +#define IPATH_S_WAITING 0x20 +#define IPATH_S_WAIT_SSN_CREDIT 0x40 +#define IPATH_S_WAIT_DMA 0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) #define IPATH_PSN_CREDIT 512 @@ -573,13 +603,11 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; - u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; - u32 n_no_piobuf; u32 n_unaligned; u32 port_cap_flags; u32 pma_sample_start; @@ -657,6 +685,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) return container_of(ibdev, struct ipath_ibdev, ibdev); } +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ + if (qp->s_flags & IPATH_S_ANY_WAIT) + qp->s_flags &= ~IPATH_S_ANY_WAIT; + if (!(qp->s_flags & IPATH_S_BUSY)) + tasklet_hi_schedule(&qp->s_task); +} + int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -706,7 +745,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -void ipath_free_all_qps(struct ipath_qp_table *qpt); +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -- cgit v1.2.3 From 74116f580b7279543340dd716a2af642f5c1c2c7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:42:20 -0700 Subject: IB/ipath: Fix RDMA read response sequence checking If an out of sequence RDMA read response middle or last packet is received, we should only resend the RDMA read request on the first out of sequence packet and drop subsequent out of sequence packets otherwise, we get "too many retries". Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 7 +++++++ drivers/infiniband/hw/ipath/ipath_verbs.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5b5276a270bc..108df667d2ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1189,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, wqe = get_swqe_ptr(qp, qp->s_last); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; + qp->r_flags &= ~IPATH_R_RDMAR_SEQ; /* * If this is a response to a resent RDMA read, we * have to be careful to copy the data to the right @@ -1202,6 +1203,9 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } @@ -1263,6 +1267,9 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index deee02ca7ca4..9d12ae8a778e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -444,6 +444,7 @@ struct ipath_qp { * Bit definitions for r_flags. */ #define IPATH_R_REUSE_SGE 0x01 +#define IPATH_R_RDMAR_SEQ 0x02 /* * Bit definitions for s_flags. -- cgit v1.2.3 From 40d97692fbfe52ef68fa771d8121394b2210fd67 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 May 2008 11:45:32 -0700 Subject: IB/ipath: Make ipath_portdata work with struct pid * not pid_t The official reason is "with the presence of pid namespaces in the kernel using pid_t-s inside one is no longer safe." But the reason I fix this right now is the following: About a month ago (when 2.6.25 was not yet released) there still was a one last caller of a to-be-deprecated-soon function find_pid() - the kill_proc() function, which in turn was only used by nfs callback code. During the last merge window, this last caller was finally eliminated by some NFS patch(es) and I was about to finally kill this kill_proc() and find_pid(), but found, that I was late and the kill_proc is now called from the ipath driver since commit 58411d1c ("IB/ipath: Head of Line blocking vs forward progress of user apps"). So here's a patch that fixes this code to use struct pid * and (!) the kill_pid routine. Signed-off-by: Pavel Emelyanov Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 18 ++++++++++-------- drivers/infiniband/hw/ipath/ipath_file_ops.c | 19 +++++++++++-------- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 ++-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ce7b7c34360e..258e66cf3546 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit) ipath_dbg("unit %u port %d is in use " "(PID %u cmd %s), can't reset\n", unit, i, - dd->ipath_pd[i]->port_pid, + pid_nr(dd->ipath_pd[i]->port_pid), dd->ipath_pd[i]->port_comm); ret = -EBUSY; goto bail; @@ -2654,19 +2654,21 @@ bail: static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; - pid_t pid; + struct pid *pid; if (!dd->ipath_pd) return 0; for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || - !dd->ipath_pd[i]->port_pid) + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) continue; pid = dd->ipath_pd[i]->port_pid; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "context %d in use " "(PID %u), sending signal %d\n", - i, pid, sig); - kill_proc(pid, sig, 1); + i, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { pid = dd->ipath_pd[i]->port_subpid[sub]; @@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) continue; dev_info(&dd->pcidev->dev, "sub-context " "%d:%d in use (PID %u), sending " - "signal %d\n", i, sub, pid, sig); - kill_proc(pid, sig, 1); + "signal %d\n", i, sub, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; } } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 3295177c937e..b472b15637f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, p = dd->ipath_pageshadow[porttid + tid]; dd->ipath_pageshadow[porttid + tid] = NULL; ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", - pd->port_pid, tid); + pid_nr(pd->port_pid), tid); dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); @@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, port); pd->port_cnt = 1; port_fp(fp) = pd; - pd->port_pid = current->pid; + pd->port_pid = get_pid(task_pid(current)); strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; @@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp, } port_fp(fp) = pd; subport_fp(fp) = pd->port_cnt++; - pd->port_subpid[subport_fp(fp)] = current->pid; + pd->port_subpid[subport_fp(fp)] = + get_pid(task_pid(current)); tidcursor_fp(fp) = 0; pd->active_slaves |= 1 << subport_fp(fp); ipath_cdbg(PROC, "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", current->comm, current->pid, subport_fp(fp), - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, pd->port_port); ret = 1; goto done; @@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp) * the slave(s) don't wait for receive data forever. */ pd->active_slaves &= ~(1 << fd->subport); - pd->port_subpid[fd->subport] = 0; + put_pid(pd->port_subpid[fd->subport]); + pd->port_subpid[fd->subport] = NULL; mutex_unlock(&ipath_mutex); goto bail; } @@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp) if (pd->port_hdrqfull) { ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pd->port_pid, + "during run\n", pd->port_comm, pid_nr(pd->port_pid), pd->port_hdrqfull); pd->port_hdrqfull = 0; } @@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, port); } - pd->port_pid = 0; + put_pid(pd->port_pid); + pd->port_pid = NULL; dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 02b24a340599..20975875a8d1 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -159,8 +159,8 @@ struct ipath_portdata { /* saved total number of polled urgent packets for poll edge trigger */ u32 port_urgent_poll; /* pid of process using this port */ - pid_t port_pid; - pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; + struct pid *port_pid; + struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; /* same size as task_struct .comm[] */ char port_comm[16]; /* pkeys set by this use of this port */ -- cgit v1.2.3 From f018c7e177a50390f6fcb137f1a28a6027d8ba50 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 13 May 2008 11:51:23 -0700 Subject: IB/ipath: Change ipath_devdata.ipath_sdma_status to be unsigned long Andrew Morton pointed out that bitops should take an unsigned long * arg. However, the ipath driver was doing bitops on struct ipath_devdata.ipath_sdma_status, which is u64. Change this member to unsigned long to avoid tons of warnings when x86 fixes the bitops to take unsigned long * instead of void *. Also, change the IPATH_SDMA_RUNNING and IPATH_SDMA_SHUTDOWN bit numbers to 30 and 31 (instead of 62 and 63) so that we're not setting another booby trap for someone who tries to make ipath work on a 32-bit architecture. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 2 +- drivers/infiniband/hw/ipath/ipath_kernel.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 258e66cf3546..daad09a45910 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { int skip_cancel; - u64 *statp = &dd->ipath_sdma_status; + unsigned long *statp = &dd->ipath_sdma_status; spin_lock_irqsave(&dd->ipath_sdma_lock, flags); skip_cancel = diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 20975875a8d1..59a8b254b97f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -483,7 +483,7 @@ struct ipath_devdata { /* SendDMA related entries */ spinlock_t ipath_sdma_lock; - u64 ipath_sdma_status; + unsigned long ipath_sdma_status; unsigned long ipath_sdma_abort_jiffies; unsigned long ipath_sdma_abort_intr_timeout; unsigned long ipath_sdma_buf_jiffies; @@ -822,8 +822,8 @@ struct ipath_devdata { #define IPATH_SDMA_DISARMED 1 #define IPATH_SDMA_DISABLED 2 #define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 62 -#define IPATH_SDMA_SHUTDOWN 63 +#define IPATH_SDMA_RUNNING 30 +#define IPATH_SDMA_SHUTDOWN 31 /* bit combinations that correspond to abort states */ #define IPATH_SDMA_ABORT_NONE 0 -- cgit v1.2.3 From a58e58fafdff4c25949221e46132e86f709d0b79 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 13 May 2008 11:52:55 -0700 Subject: RDMA/cxgb3: Wrap the software send queue pointer as needed on flush cxio_flush_sq() was failing to wrap around the software send queue causing garbage completion entries on a flush operation. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index ebf9d3043f80..3f441fc57c17 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -405,11 +405,11 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; - sqp += count; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { insert_sq_cqe(wq, cq, sqp); - sqp++; ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); flushed++; } return flushed; -- cgit v1.2.3