Преглед изворни кода

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  RDMA/cxgb3: Wrap the software send queue pointer as needed on flush
  IB/ipath: Change ipath_devdata.ipath_sdma_status to be unsigned long
  IB/ipath: Make ipath_portdata work with struct pid * not pid_t
  IB/ipath: Fix RDMA read response sequence checking
  IB/ipath: Fix many locking issues when switching to error state
  IB/ipath: Fix RC and UC error handling
  RDMA/nes: Fix up nes_lro_max_aggr module parameter
Linus Torvalds пре 17 година
родитељ
комит
dd286422fe

+ 2 - 2
drivers/infiniband/hw/cxgb3/cxio_hal.c

@@ -405,11 +405,11 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
 	struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
 
 	ptr = wq->sq_rptr + count;
-	sqp += count;
+	sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
 	while (ptr != wq->sq_wptr) {
 		insert_sq_cqe(wq, cq, sqp);
-		sqp++;
 		ptr++;
+		sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
 		flushed++;
 	}
 	return flushed;

+ 11 - 9
drivers/infiniband/hw/ipath/ipath_driver.c

@@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
 	 */
 	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
 		int skip_cancel;
-		u64 *statp = &dd->ipath_sdma_status;
+		unsigned long *statp = &dd->ipath_sdma_status;
 
 		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
 		skip_cancel =
@@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit)
 				ipath_dbg("unit %u port %d is in use "
 					  "(PID %u cmd %s), can't reset\n",
 					  unit, i,
-					  dd->ipath_pd[i]->port_pid,
+					  pid_nr(dd->ipath_pd[i]->port_pid),
 					  dd->ipath_pd[i]->port_comm);
 				ret = -EBUSY;
 				goto bail;
@@ -2654,19 +2654,21 @@ bail:
 static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
 {
 	int i, sub, any = 0;
-	pid_t pid;
+	struct pid *pid;
 
 	if (!dd->ipath_pd)
 		return 0;
 	for (i = 1; i < dd->ipath_cfgports; i++) {
-		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
-		    !dd->ipath_pd[i]->port_pid)
+		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
 			continue;
 		pid = dd->ipath_pd[i]->port_pid;
+		if (!pid)
+			continue;
+
 		dev_info(&dd->pcidev->dev, "context %d in use "
 			  "(PID %u), sending signal %d\n",
-			  i, pid, sig);
-		kill_proc(pid, sig, 1);
+			  i, pid_nr(pid), sig);
+		kill_pid(pid, sig, 1);
 		any++;
 		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
 			pid = dd->ipath_pd[i]->port_subpid[sub];
@@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
 				continue;
 			dev_info(&dd->pcidev->dev, "sub-context "
 				"%d:%d in use (PID %u), sending "
-				"signal %d\n", i, sub, pid, sig);
-			kill_proc(pid, sig, 1);
+				"signal %d\n", i, sub, pid_nr(pid), sig);
+			kill_pid(pid, sig, 1);
 			any++;
 		}
 	}

+ 11 - 8
drivers/infiniband/hw/ipath/ipath_file_ops.c

@@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
 			p = dd->ipath_pageshadow[porttid + tid];
 			dd->ipath_pageshadow[porttid + tid] = NULL;
 			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-				   pd->port_pid, tid);
+				   pid_nr(pd->port_pid), tid);
 			dd->ipath_f_put_tid(dd, &tidbase[tid],
 					    RCVHQ_RCV_TYPE_EXPECTED,
 					    dd->ipath_tidinvalid);
@@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
 			   port);
 		pd->port_cnt = 1;
 		port_fp(fp) = pd;
-		pd->port_pid = current->pid;
+		pd->port_pid = get_pid(task_pid(current));
 		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
 		ipath_stats.sps_ports++;
 		ret = 0;
@@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp,
 			}
 			port_fp(fp) = pd;
 			subport_fp(fp) = pd->port_cnt++;
-			pd->port_subpid[subport_fp(fp)] = current->pid;
+			pd->port_subpid[subport_fp(fp)] =
+				get_pid(task_pid(current));
 			tidcursor_fp(fp) = 0;
 			pd->active_slaves |= 1 << subport_fp(fp);
 			ipath_cdbg(PROC,
 				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
 				   current->comm, current->pid,
 				   subport_fp(fp),
-				   pd->port_comm, pd->port_pid,
+				   pd->port_comm, pid_nr(pd->port_pid),
 				   dd->ipath_unit, pd->port_port);
 			ret = 1;
 			goto done;
@@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp)
 		 * the slave(s) don't wait for receive data forever.
 		 */
 		pd->active_slaves &= ~(1 << fd->subport);
-		pd->port_subpid[fd->subport] = 0;
+		put_pid(pd->port_subpid[fd->subport]);
+		pd->port_subpid[fd->subport] = NULL;
 		mutex_unlock(&ipath_mutex);
 		goto bail;
 	}
@@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp)
 
 	if (pd->port_hdrqfull) {
 		ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
-			   "during run\n", pd->port_comm, pd->port_pid,
+			   "during run\n", pd->port_comm, pid_nr(pd->port_pid),
 			   pd->port_hdrqfull);
 		pd->port_hdrqfull = 0;
 	}
@@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp)
 			unlock_expected_tids(pd);
 		ipath_stats.sps_ports--;
 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-			   pd->port_comm, pd->port_pid,
+			   pd->port_comm, pid_nr(pd->port_pid),
 			   dd->ipath_unit, port);
 	}
 
-	pd->port_pid = 0;
+	put_pid(pd->port_pid);
+	pd->port_pid = NULL;
 	dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
 	mutex_unlock(&ipath_mutex);
 	ipath_free_pddata(dd, pd); /* after releasing the mutex */

+ 5 - 5
drivers/infiniband/hw/ipath/ipath_kernel.h

@@ -159,8 +159,8 @@ struct ipath_portdata {
 	/* saved total number of polled urgent packets for poll edge trigger */
 	u32 port_urgent_poll;
 	/* pid of process using this port */
-	pid_t port_pid;
-	pid_t port_subpid[INFINIPATH_MAX_SUBPORT];
+	struct pid *port_pid;
+	struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
 	/* same size as task_struct .comm[] */
 	char port_comm[16];
 	/* pkeys set by this use of this port */
@@ -483,7 +483,7 @@ struct ipath_devdata {
 
 	/* SendDMA related entries */
 	spinlock_t            ipath_sdma_lock;
-	u64                   ipath_sdma_status;
+	unsigned long         ipath_sdma_status;
 	unsigned long         ipath_sdma_abort_jiffies;
 	unsigned long         ipath_sdma_abort_intr_timeout;
 	unsigned long         ipath_sdma_buf_jiffies;
@@ -822,8 +822,8 @@ struct ipath_devdata {
 #define IPATH_SDMA_DISARMED  1
 #define IPATH_SDMA_DISABLED  2
 #define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  62
-#define IPATH_SDMA_SHUTDOWN 63
+#define IPATH_SDMA_RUNNING  30
+#define IPATH_SDMA_SHUTDOWN 31
 
 /* bit combinations that correspond to abort states */
 #define IPATH_SDMA_ABORT_NONE 0

+ 99 - 138
drivers/infiniband/hw/ipath/ipath_qp.c

@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
 {
 	struct ipath_qp *q, **qpp;
 	unsigned long flags;
-	int fnd = 0;
 
 	spin_lock_irqsave(&qpt->lock, flags);
 
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
 			*qpp = qp->next;
 			qp->next = NULL;
 			atomic_dec(&qp->refcount);
-			fnd = 1;
 			break;
 		}
 	}
 
 	spin_unlock_irqrestore(&qpt->lock, flags);
-
-	if (!fnd)
-		return;
-
-	free_qpn(qpt, qp->ibqp.qp_num);
-
-	wait_event(qp->wait, !atomic_read(&qp->refcount));
 }
 
 /**
- * ipath_free_all_qps - remove all QPs from the table
+ * ipath_free_all_qps - check for QPs still in use
  * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
  */
-void ipath_free_all_qps(struct ipath_qp_table *qpt)
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
 {
 	unsigned long flags;
-	struct ipath_qp *qp, *nqp;
-	u32 n;
+	struct ipath_qp *qp;
+	u32 n, qp_inuse = 0;
 
+	spin_lock_irqsave(&qpt->lock, flags);
 	for (n = 0; n < qpt->max; n++) {
-		spin_lock_irqsave(&qpt->lock, flags);
 		qp = qpt->table[n];
 		qpt->table[n] = NULL;
-		spin_unlock_irqrestore(&qpt->lock, flags);
-
-		while (qp) {
-			nqp = qp->next;
-			free_qpn(qpt, qp->ibqp.qp_num);
-			if (!atomic_dec_and_test(&qp->refcount) ||
-			    !ipath_destroy_qp(&qp->ibqp))
-				ipath_dbg("QP memory leak!\n");
-			qp = nqp;
-		}
+
+		for (; qp; qp = qp->next)
+			qp_inuse++;
 	}
+	spin_unlock_irqrestore(&qpt->lock, flags);
 
-	for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
+	for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
 		if (qpt->map[n].page)
-			free_page((unsigned long)qpt->map[n].page);
-	}
+			free_page((unsigned long) qpt->map[n].page);
+	return qp_inuse;
 }
 
 /**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
 	qp->remote_qpn = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
-	qp->s_busy = 0;
+	atomic_set(&qp->s_dma_busy, 0);
 	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_wqe = NULL;
 	qp->s_pkt_delay = 0;
+	qp->s_draining = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
 	qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
 	}
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_nak_state = 0;
-	qp->r_wrid_valid = 0;
+	qp->r_aflags = 0;
+	qp->r_flags = 0;
 	qp->s_rnr_timeout = 0;
 	qp->s_head = 0;
 	qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->s_wait_credit = 0;
 	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
 		qp->r_rq.wq->head = 0;
 		qp->r_rq.wq->tail = 0;
 	}
-	qp->r_reuse_sge = 0;
 }
 
 /**
- * ipath_error_qp - put a QP into an error state
- * @qp: the QP to put into an error state
+ * ipath_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
  * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
  * Returns true if last WQE event should be generated.
  * The QP s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
  */
 
 int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	struct ib_wc wc;
 	int ret = 0;
 
-	ipath_dbg("QP%d/%d in error state (%d)\n",
-		  qp->ibqp.qp_num, qp->remote_qpn, err);
+	if (qp->state == IB_QPS_ERR)
+		goto bail;
+
+	qp->state = IB_QPS_ERR;
 
 	spin_lock(&dev->pending_lock);
 	if (!list_empty(&qp->timerwait))
@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 		list_del_init(&qp->piowait);
 	spin_unlock(&dev->pending_lock);
 
-	wc.vendor_err = 0;
-	wc.byte_len = 0;
-	wc.imm_data = 0;
+	/* Schedule the sending tasklet to drain the send work queue. */
+	if (qp->s_last != qp->s_head)
+		ipath_schedule_send(qp);
+
+	memset(&wc, 0, sizeof(wc));
 	wc.qp = &qp->ibqp;
-	wc.src_qp = 0;
-	wc.wc_flags = 0;
-	wc.pkey_index = 0;
-	wc.slid = 0;
-	wc.sl = 0;
-	wc.dlid_path_bits = 0;
-	wc.port_num = 0;
-	if (qp->r_wrid_valid) {
-		qp->r_wrid_valid = 0;
+	wc.opcode = IB_WC_RECV;
+
+	if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
 		wc.wr_id = qp->r_wr_id;
-		wc.opcode = IB_WC_RECV;
 		wc.status = err;
 		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
 	}
 	wc.status = IB_WC_WR_FLUSH_ERR;
 
-	while (qp->s_last != qp->s_head) {
-		struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
-		wc.wr_id = wqe->wr.wr_id;
-		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
-	}
-	qp->s_cur = qp->s_tail = qp->s_head;
-	qp->s_hdrwords = 0;
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-
 	if (qp->r_rq.wq) {
 		struct ipath_rwq *wq;
 		u32 head;
@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 		tail = wq->tail;
 		if (tail >= qp->r_rq.size)
 			tail = 0;
-		wc.opcode = IB_WC_RECV;
 		while (tail != head) {
 			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
 			if (++tail >= qp->r_rq.size)
@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	} else if (qp->ibqp.event_handler)
 		ret = 1;
 
+bail:
 	return ret;
 }
 
@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
-	unsigned long flags;
 	int lastwqe = 0;
 	int ret;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	spin_lock_irq(&qp->s_lock);
 
 	cur_state = attr_mask & IB_QP_CUR_STATE ?
 		attr->cur_qp_state : qp->state;
@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	switch (new_state) {
 	case IB_QPS_RESET:
+		if (qp->state != IB_QPS_RESET) {
+			qp->state = IB_QPS_RESET;
+			spin_lock(&dev->pending_lock);
+			if (!list_empty(&qp->timerwait))
+				list_del_init(&qp->timerwait);
+			if (!list_empty(&qp->piowait))
+				list_del_init(&qp->piowait);
+			spin_unlock(&dev->pending_lock);
+			qp->s_flags &= ~IPATH_S_ANY_WAIT;
+			spin_unlock_irq(&qp->s_lock);
+			/* Stop the sending tasklet */
+			tasklet_kill(&qp->s_task);
+			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+			spin_lock_irq(&qp->s_lock);
+		}
 		ipath_reset_qp(qp, ibqp->qp_type);
 		break;
 
+	case IB_QPS_SQD:
+		qp->s_draining = qp->s_last != qp->s_cur;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_SQE:
+		if (qp->ibqp.qp_type == IB_QPT_RC)
+			goto inval;
+		qp->state = new_state;
+		break;
+
 	case IB_QPS_ERR:
 		lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 		break;
 
 	default:
+		qp->state = new_state;
 		break;
-
 	}
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
 		qp->s_max_rd_atomic = attr->max_rd_atomic;
 
-	qp->state = new_state;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_unlock_irq(&qp->s_lock);
 
 	if (lastwqe) {
 		struct ib_event ev;
@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	goto bail;
 
 inval:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_unlock_irq(&qp->s_lock);
 	ret = -EINVAL;
 
 bail:
@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->pkey_index = qp->s_pkey_index;
 	attr->alt_pkey_index = 0;
 	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = 0;
+	attr->sq_draining = qp->s_draining;
 	attr->max_rd_atomic = qp->s_max_rd_atomic;
 	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		spin_lock_init(&qp->r_rq.lock);
 		atomic_set(&qp->refcount, 0);
 		init_waitqueue_head(&qp->wait);
+		init_waitqueue_head(&qp->wait_dma);
 		tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
 		INIT_LIST_HEAD(&qp->piowait);
 		INIT_LIST_HEAD(&qp->timerwait);
@@ -926,6 +924,7 @@ bail_ip:
 	else
 		vfree(qp->r_rq.wq);
 	ipath_free_qp(&dev->qp_table, qp);
+	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
 bail_qp:
 	kfree(qp);
 bail_swq:
@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 {
 	struct ipath_qp *qp = to_iqp(ibqp);
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-	qp->state = IB_QPS_ERR;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
+	/* Make sure HW and driver activity is stopped. */
+	spin_lock_irq(&qp->s_lock);
+	if (qp->state != IB_QPS_RESET) {
+		qp->state = IB_QPS_RESET;
+		spin_lock(&dev->pending_lock);
+		if (!list_empty(&qp->timerwait))
+			list_del_init(&qp->timerwait);
+		if (!list_empty(&qp->piowait))
+			list_del_init(&qp->piowait);
+		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~IPATH_S_ANY_WAIT;
+		spin_unlock_irq(&qp->s_lock);
+		/* Stop the sending tasklet */
+		tasklet_kill(&qp->s_task);
+		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+	} else
+		spin_unlock_irq(&qp->s_lock);
 
-	/* Stop the sending tasklet. */
-	tasklet_kill(&qp->s_task);
+	ipath_free_qp(&dev->qp_table, qp);
 
 	if (qp->s_tx) {
 		atomic_dec(&qp->refcount);
 		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
 			kfree(qp->s_tx->txreq.map_addr);
+		spin_lock_irq(&dev->pending_lock);
+		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+		spin_unlock_irq(&dev->pending_lock);
+		qp->s_tx = NULL;
 	}
 
-	/* Make sure the QP isn't on the timeout list. */
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	if (qp->s_tx)
-		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
 
-	/*
-	 * Make sure that the QP is not in the QPN table so receive
-	 * interrupts will discard packets for this QP.  XXX Also remove QP
-	 * from multicast table.
-	 */
-	if (atomic_read(&qp->refcount) != 0)
-		ipath_free_qp(&dev->qp_table, qp);
+	/* all user's cleaned up, mark it available */
+	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);
 
 	if (qp->ip)
 		kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1025,48 +1027,6 @@ bail:
 	return ret;
 }
 
-/**
- * ipath_sqerror_qp - put a QP's send queue into an error state
- * @qp: QP who's send queue will be put into an error state
- * @wc: the WC responsible for putting the QP in this state
- *
- * Flushes the send work queue.
- * The QP s_lock should be held and interrupts disabled.
- */
-
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
-	ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
-		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
-
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	spin_unlock(&dev->pending_lock);
-
-	ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
-	if (++qp->s_last >= qp->s_size)
-		qp->s_last = 0;
-
-	wc->status = IB_WC_WR_FLUSH_ERR;
-
-	while (qp->s_last != qp->s_head) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		wc->wr_id = wqe->wr.wr_id;
-		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
-	}
-	qp->s_cur = qp->s_tail = qp->s_head;
-	qp->state = IB_QPS_SQE;
-}
-
 /**
  * ipath_get_credit - flush the send work queue of a QP
  * @qp: the qp who's send work queue to flush
@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
 	}
 
 	/* Restart sending if it was blocked due to lack of credits. */
-	if (qp->s_cur != qp->s_head &&
+	if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+	    qp->s_cur != qp->s_head &&
 	    (qp->s_lsn == (u32) -1 ||
 	     ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
 			 qp->s_lsn + 1) <= 0))
-		tasklet_hi_schedule(&qp->s_task);
+		ipath_schedule_send(qp);
 }

+ 145 - 140
drivers/infiniband/hw/ipath/ipath_rc.c

@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
 	u32 bth0;
 	u32 bth2;
 
+	/* Don't send an ACK if we aren't supposed to. */
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+		goto bail;
+
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
 	    ipath_make_rc_ack(dev, qp, ohdr, pmtu))
 		goto done;
 
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
-	    qp->s_rnr_timeout || qp->s_wait_credit)
-		goto bail;
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= IPATH_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		goto done;
+	}
 
-	/* Limit the number of packets sent without an ACK. */
-	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
-		qp->s_wait_credit = 1;
-		dev->n_rc_stalls++;
+	/* Leave BUSY set until RNR timeout. */
+	if (qp->s_rnr_timeout) {
+		qp->s_flags |= IPATH_S_WAITING;
 		goto bail;
 	}
 
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
 	wqe = get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
+		if (!(ib_ipath_state_ops[qp->state] &
+		    IPATH_PROCESS_NEXT_SEND_OK))
+			goto bail;
 		/*
 		 * Resend an old request or start a new one.
 		 *
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
 				goto bail;
+			}
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
 				goto bail;
+			}
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
 			ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
 	ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 done:
 	ret = 1;
+	goto unlock;
+
 bail:
+	qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
 
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
+	/* Don't try to send ACKs if the link isn't ACTIVE */
 	dd = dev->dd;
+	if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+		goto done;
+
 	piobuf = ipath_getpiobuf(dd, 0, NULL);
 	if (!piobuf) {
 		/*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
 	goto done;
 
 queue_ack:
-	dev->n_rc_qacks++;
-	qp->s_flags |= IPATH_S_ACK_PENDING;
-	qp->s_nak_state = qp->r_nak_state;
-	qp->s_ack_psn = qp->r_ack_psn;
+	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
+		dev->n_rc_qacks++;
+		qp->s_flags |= IPATH_S_ACK_PENDING;
+		qp->s_nak_state = qp->r_nak_state;
+		qp->s_ack_psn = qp->r_ack_psn;
+
+		/* Schedule the send tasklet. */
+		ipath_schedule_send(qp);
+	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	/* Call ipath_do_rc_send() in another thread. */
-	tasklet_hi_schedule(&qp->s_task);
-
 done:
 	return;
 }
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
 	/*
 	 * Set the state to restart in the middle of a request.
 	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
-	 * See ipath_do_rc_send().
+	 * See ipath_make_rc_req().
 	 */
 	switch (opcode) {
 	case IB_WR_SEND:
@@ -771,27 +802,14 @@ done:
  *
  * The QP s_lock should be held and interrupts disabled.
  */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
 {
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 	struct ipath_ibdev *dev;
 
 	if (qp->s_retry == 0) {
-		wc->wr_id = wqe->wr.wr_id;
-		wc->status = IB_WC_RETRY_EXC_ERR;
-		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		wc->vendor_err = 0;
-		wc->byte_len = 0;
-		wc->qp = &qp->ibqp;
-		wc->imm_data = 0;
-		wc->src_qp = qp->remote_qpn;
-		wc->wc_flags = 0;
-		wc->pkey_index = 0;
-		wc->slid = qp->remote_ah_attr.dlid;
-		wc->sl = qp->remote_ah_attr.sl;
-		wc->dlid_path_bits = 0;
-		wc->port_num = 0;
-		ipath_sqerror_qp(qp, wc);
+		ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 		goto bail;
 	}
 	qp->s_retry--;
@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
 	spin_lock(&dev->pending_lock);
 	if (!list_empty(&qp->timerwait))
 		list_del_init(&qp->timerwait);
+	if (!list_empty(&qp->piowait))
+		list_del_init(&qp->piowait);
 	spin_unlock(&dev->pending_lock);
 
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
 		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
 
 	reset_psn(qp, psn);
-	tasklet_hi_schedule(&qp->s_task);
+	ipath_schedule_send(qp);
 
 bail:
 	return;
@@ -820,13 +840,7 @@ bail:
 
 static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
 {
-	if (qp->s_last_psn != psn) {
-		qp->s_last_psn = psn;
-		if (qp->s_wait_credit) {
-			qp->s_wait_credit = 0;
-			tasklet_hi_schedule(&qp->s_task);
-		}
-	}
+	qp->s_last_psn = psn;
 }
 
 /**
@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
+	enum ib_wc_status status;
 	struct ipath_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 			 */
 			update_last_psn(qp, wqe->psn - 1);
 			/* Retry this request. */
-			ipath_restart_rc(qp, wqe->psn, &wc);
+			ipath_restart_rc(qp, wqe->psn);
 			/*
 			 * No need to process the ACK/NAK since we are
 			 * restarting an earlier request.
@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~IPATH_S_FENCE_PENDING;
-				tasklet_hi_schedule(&qp->s_task);
-			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
-				qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
-				tasklet_hi_schedule(&qp->s_task);
-			}
+			if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+			     !qp->s_num_rd_atomic) ||
+			    qp->s_flags & IPATH_S_RDMAR_PENDING)
+				ipath_schedule_send(qp);
 		}
 		/* Post a send completion queue entry if requested. */
 		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+			memset(&wc, 0, sizeof wc);
 			wc.wr_id = wqe->wr.wr_id;
 			wc.status = IB_WC_SUCCESS;
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-			wc.vendor_err = 0;
 			wc.byte_len = wqe->length;
-			wc.imm_data = 0;
 			wc.qp = &qp->ibqp;
 			wc.src_qp = qp->remote_qpn;
-			wc.wc_flags = 0;
-			wc.pkey_index = 0;
 			wc.slid = qp->remote_ah_attr.dlid;
 			wc.sl = qp->remote_ah_attr.sl;
-			wc.dlid_path_bits = 0;
-			wc.port_num = 0;
 			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
 		}
 		qp->s_retry = qp->s_retry_cnt;
@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 		} else {
 			if (++qp->s_last >= qp->s_size)
 				qp->s_last = 0;
+			if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+				qp->s_draining = 0;
 			if (qp->s_last == qp->s_tail)
 				break;
 			wqe = get_swqe_ptr(qp, qp->s_last);
@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 			 */
 			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
 				reset_psn(qp, psn + 1);
-				tasklet_hi_schedule(&qp->s_task);
+				ipath_schedule_send(qp);
 			}
 		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
 			qp->s_state = OP(SEND_LAST);
@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 		if (qp->s_last == qp->s_tail)
 			goto bail;
 		if (qp->s_rnr_retry == 0) {
-			wc.status = IB_WC_RNR_RETRY_EXC_ERR;
+			status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
 		}
 		if (qp->s_rnr_retry_cnt < 7)
@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 			ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
 					   IPATH_AETH_CREDIT_MASK];
 		ipath_insert_rnr_queue(qp);
+		ipath_schedule_send(qp);
 		goto bail;
 
 	case 3:		/* NAK */
@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
 			 * RDMA READ response which terminates the RDMA
 			 * READ.
 			 */
-			ipath_restart_rc(qp, psn, &wc);
+			ipath_restart_rc(qp, psn);
 			break;
 
 		case 1:	/* Invalid Request */
-			wc.status = IB_WC_REM_INV_REQ_ERR;
+			status = IB_WC_REM_INV_REQ_ERR;
 			dev->n_other_naks++;
 			goto class_b;
 
 		case 2:	/* Remote Access Error */
-			wc.status = IB_WC_REM_ACCESS_ERR;
+			status = IB_WC_REM_ACCESS_ERR;
 			dev->n_other_naks++;
 			goto class_b;
 
 		case 3:	/* Remote Operation Error */
-			wc.status = IB_WC_REM_OP_ERR;
+			status = IB_WC_REM_OP_ERR;
 			dev->n_other_naks++;
 		class_b:
-			wc.wr_id = wqe->wr.wr_id;
-			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-			wc.vendor_err = 0;
-			wc.byte_len = 0;
-			wc.qp = &qp->ibqp;
-			wc.imm_data = 0;
-			wc.src_qp = qp->remote_qpn;
-			wc.wc_flags = 0;
-			wc.pkey_index = 0;
-			wc.slid = qp->remote_ah_attr.dlid;
-			wc.sl = qp->remote_ah_attr.sl;
-			wc.dlid_path_bits = 0;
-			wc.port_num = 0;
-			ipath_sqerror_qp(qp, &wc);
+			ipath_send_complete(qp, wqe, status);
+			ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			break;
 
 		default:
@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 				     int header_in_data)
 {
 	struct ipath_swqe *wqe;
+	enum ib_wc_status status;
 	unsigned long flags;
-	struct ib_wc wc;
 	int diff;
 	u32 pad;
 	u32 aeth;
@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
+	/* Double check we can process this now that we hold the s_lock. */
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+		goto ack_done;
+
 	/* Ignore invalid responses. */
 	if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
 		goto ack_done;
@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 	if (unlikely(qp->s_last == qp->s_tail))
 		goto ack_done;
 	wqe = get_swqe_ptr(qp, qp->s_last);
+	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
 	case OP(ACKNOWLEDGE):
@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		wqe = get_swqe_ptr(qp, qp->s_last);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
+		qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
 		/*
 		 * If this is a response to a resent RDMA read, we
 		 * have to be careful to copy the data to the right
@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		/* no AETH, no ACK */
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+				goto ack_done;
+			qp->r_flags |= IPATH_R_RDMAR_SEQ;
+			ipath_restart_rc(qp, qp->s_last_psn + 1);
 			goto ack_done;
 		}
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		/* ACKs READ req. */
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+				goto ack_done;
+			qp->r_flags |= IPATH_R_RDMAR_SEQ;
+			ipath_restart_rc(qp, qp->s_last_psn + 1);
 			goto ack_done;
 		}
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		goto ack_done;
 	}
 
-ack_done:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	goto bail;
-
 ack_op_err:
-	wc.status = IB_WC_LOC_QP_OP_ERR;
+	status = IB_WC_LOC_QP_OP_ERR;
 	goto ack_err;
 
 ack_len_err:
-	wc.status = IB_WC_LOC_LEN_ERR;
+	status = IB_WC_LOC_LEN_ERR;
 ack_err:
-	wc.wr_id = wqe->wr.wr_id;
-	wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-	wc.vendor_err = 0;
-	wc.byte_len = 0;
-	wc.imm_data = 0;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = qp->remote_qpn;
-	wc.wc_flags = 0;
-	wc.pkey_index = 0;
-	wc.slid = qp->remote_ah_attr.dlid;
-	wc.sl = qp->remote_ah_attr.sl;
-	wc.dlid_path_bits = 0;
-	wc.port_num = 0;
-	ipath_sqerror_qp(qp, &wc);
+	ipath_send_complete(qp, wqe, status);
+	ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 bail:
 	return;
@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 	psn &= IPATH_PSN_MASK;
 	e = NULL;
 	old_req = 1;
+
 	spin_lock_irqsave(&qp->s_lock, flags);
+	/* Double check we can process this now that we hold the s_lock. */
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+		goto unlock_done;
+
 	for (i = qp->r_head_ack_queue; ; i = prev) {
 		if (i == qp->s_tail_ack_queue)
 			old_req = 0;
@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 		break;
 	}
 	qp->r_nak_state = 0;
-	tasklet_hi_schedule(&qp->s_task);
+	ipath_schedule_send(qp);
 
 unlock_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1523,13 +1522,12 @@ send_ack:
 	return 0;
 }
 
-static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	qp->state = IB_QPS_ERR;
 	lastwqe = ipath_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
 
 static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
 {
-	unsigned long flags;
 	unsigned next;
 
 	next = n + 1;
 	if (next > IPATH_MAX_RDMA_ATOMIC)
 		next = 0;
-	spin_lock_irqsave(&qp->s_lock, flags);
 	if (n == qp->s_tail_ack_queue) {
 		qp->s_tail_ack_queue = next;
 		qp->s_ack_state = OP(ACKNOWLEDGE);
 	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /**
@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	int diff;
 	struct ib_reth *reth;
 	int header_in_data;
+	unsigned long flags;
 
 	/* Validate the SLID. See Ch. 9.6.1.5 */
 	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		    opcode == OP(SEND_LAST) ||
 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
 			break;
-	nack_inv:
-		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
-		qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-		qp->r_ack_psn = qp->r_psn;
-		goto send_ack;
+		goto nack_inv;
 
 	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_MIDDLE):
@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		break;
 	}
 
-	wc.imm_data = 0;
-	wc.wc_flags = 0;
+	memset(&wc, 0, sizeof wc);
 
 	/* OK, process the packet. */
 	switch (opcode) {
 	case OP(SEND_FIRST):
-		if (!ipath_get_rwqe(qp, 0)) {
-		rnr_nak:
-			qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-			qp->r_ack_psn = qp->r_psn;
-			goto send_ack;
-		}
+		if (!ipath_get_rwqe(qp, 0))
+			goto rnr_nak;
 		qp->r_rcv_len = 0;
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
@@ -1741,9 +1728,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			goto nack_inv;
 		ipath_copy_sge(&qp->r_sge, data, tlen);
 		qp->r_msn++;
-		if (!qp->r_wrid_valid)
+		if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
 			break;
-		qp->r_wrid_valid = 0;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
@@ -1751,14 +1737,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 		else
 			wc.opcode = IB_WC_RECV;
-		wc.vendor_err = 0;
 		wc.qp = &qp->ibqp;
 		wc.src_qp = qp->remote_qpn;
-		wc.pkey_index = 0;
 		wc.slid = qp->remote_ah_attr.dlid;
 		wc.sl = qp->remote_ah_attr.sl;
-		wc.dlid_path_bits = 0;
-		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
 		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 			       (ohdr->bth[0] &
@@ -1819,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		next = qp->r_head_ack_queue + 1;
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 			next = 0;
+		spin_lock_irqsave(&qp->s_lock, flags);
+		/* Double check we can process this while holding the s_lock. */
+		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+			goto unlock;
 		if (unlikely(next == qp->s_tail_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv;
+				goto nack_inv_unlck;
 			ipath_update_ack_queue(qp, next);
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1842,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
 					   rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
-				goto nack_acc;
+				goto nack_acc_unlck;
 			/*
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
@@ -1869,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		qp->r_psn++;
 		qp->r_state = opcode;
 		qp->r_nak_state = 0;
-		barrier();
 		qp->r_head_ack_queue = next;
 
-		/* Call ipath_do_rc_send() in another thread. */
-		tasklet_hi_schedule(&qp->s_task);
+		/* Schedule the send tasklet. */
+		ipath_schedule_send(qp);
 
-		goto done;
+		goto unlock;
 	}
 
 	case OP(COMPARE_SWAP):
@@ -1894,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		next = qp->r_head_ack_queue + 1;
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 			next = 0;
+		spin_lock_irqsave(&qp->s_lock, flags);
+		/* Double check we can process this while holding the s_lock. */
+		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+			goto unlock;
 		if (unlikely(next == qp->s_tail_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv;
+				goto nack_inv_unlck;
 			ipath_update_ack_queue(qp, next);
 		}
 		if (!header_in_data)
@@ -1906,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
 			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
-			goto nack_inv;
+			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
 					    sizeof(u64), vaddr, rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc;
+			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
@@ -1929,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		qp->r_psn++;
 		qp->r_state = opcode;
 		qp->r_nak_state = 0;
-		barrier();
 		qp->r_head_ack_queue = next;
 
-		/* Call ipath_do_rc_send() in another thread. */
-		tasklet_hi_schedule(&qp->s_task);
+		/* Schedule the send tasklet. */
+		ipath_schedule_send(qp);
 
-		goto done;
+		goto unlock;
 	}
 
 	default:
@@ -1951,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		goto send_ack;
 	goto done;
 
+rnr_nak:
+	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+	qp->r_ack_psn = qp->r_psn;
+	goto send_ack;
+
+nack_inv_unlck:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+	ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+	qp->r_ack_psn = qp->r_psn;
+	goto send_ack;
+
+nack_acc_unlck:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 nack_acc:
-	ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+	ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
 	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
 	qp->r_ack_psn = qp->r_psn;
-
 send_ack:
 	send_rc_ack(qp);
+	goto done;
 
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 done:
 	return;
 }

+ 192 - 137
drivers/infiniband/hw/ipath/ipath_ruc.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
  * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
  * @qp: the QP
  *
+ * Called with the QP s_lock held and interrupts disabled.
  * XXX Use a simple list for now.  We might need a priority
  * queue if we have lots of QPs waiting for RNR timeouts
  * but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
 void ipath_insert_rnr_queue(struct ipath_qp *qp)
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	/* We already did a spin_lock_irqsave(), so just use spin_lock */
+	spin_lock(&dev->pending_lock);
 	if (list_empty(&dev->rnrwait))
 		list_add(&qp->timerwait, &dev->rnrwait);
 	else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
 			nqp->s_rnr_timeout -= qp->s_rnr_timeout;
 		list_add(&qp->timerwait, l);
 	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock(&dev->pending_lock);
 }
 
 /**
@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
 	goto bail;
 
 bad_lkey:
+	memset(&wc, 0, sizeof(wc));
 	wc.wr_id = wqe->wr_id;
 	wc.status = IB_WC_LOC_PROT_ERR;
 	wc.opcode = IB_WC_RECV;
-	wc.vendor_err = 0;
-	wc.byte_len = 0;
-	wc.imm_data = 0;
 	wc.qp = &qp->ibqp;
-	wc.src_qp = 0;
-	wc.wc_flags = 0;
-	wc.pkey_index = 0;
-	wc.slid = 0;
-	wc.sl = 0;
-	wc.dlid_path_bits = 0;
-	wc.port_num = 0;
 	/* Signal solicited completion event. */
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+		ret = 0;
+		goto unlock;
+	}
+
 	wq = rq->wq;
 	tail = wq->tail;
 	/* Validate tail before using it since it is user writable. */
@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 		tail = 0;
 	do {
 		if (unlikely(tail == wq->head)) {
-			spin_unlock_irqrestore(&rq->lock, flags);
 			ret = 0;
-			goto bail;
+			goto unlock;
 		}
 		/* Make sure entry is read after head index is read. */
 		smp_rmb();
@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 	wq->tail = tail;
 
 	ret = 1;
-	qp->r_wrid_valid = 1;
+	set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 			goto bail;
 		}
 	}
+unlock:
 	spin_unlock_irqrestore(&rq->lock, flags);
-
 bail:
 	return ret;
 }
@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 	struct ib_wc wc;
 	u64 sdata;
 	atomic64_t *maddr;
+	enum ib_wc_status send_status;
 
+	/*
+	 * Note that we check the responder QP state after
+	 * checking the requester's state.
+	 */
 	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-	if (!qp) {
-		dev->n_pkt_drops++;
-		return;
-	}
 
-again:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
-	    sqp->s_rnr_timeout) {
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		goto done;
-	}
+	/* Return if we are already busy processing a work request. */
+	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+		goto unlock;
 
-	/* Get the next send request. */
-	if (sqp->s_last == sqp->s_head) {
-		/* Send work queue is empty. */
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		goto done;
+	sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+	if (sqp->s_last == sqp->s_head)
+		goto clr_busy;
+	wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+	/* Return if it is not OK to start a new work reqeust. */
+	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+			goto clr_busy;
+		/* We are in the error state, flush the work request. */
+		send_status = IB_WC_WR_FLUSH_ERR;
+		goto flush_send;
 	}
 
 	/*
 	 * We can rely on the entry not changing without the s_lock
 	 * being held until we update s_last.
+	 * We increment s_cur to indicate s_last is in progress.
 	 */
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	if (sqp->s_last == sqp->s_cur) {
+		if (++sqp->s_cur >= sqp->s_size)
+			sqp->s_cur = 0;
+	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	wc.wc_flags = 0;
-	wc.imm_data = 0;
+	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+		dev->n_pkt_drops++;
+		/*
+		 * For RC, the requester would timeout and retry so
+		 * shortcut the timeouts and just signal too many retries.
+		 */
+		if (sqp->ibqp.qp_type == IB_QPT_RC)
+			send_status = IB_WC_RETRY_EXC_ERR;
+		else
+			send_status = IB_WC_SUCCESS;
+		goto serr;
+	}
+
+	memset(&wc, 0, sizeof wc);
+	send_status = IB_WC_SUCCESS;
 
 	sqp->s_sge.sge = wqe->sg_list[0];
 	sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -313,75 +334,33 @@ again:
 		wc.imm_data = wqe->wr.ex.imm_data;
 		/* FALLTHROUGH */
 	case IB_WR_SEND:
-		if (!ipath_get_rwqe(qp, 0)) {
-		rnr_nak:
-			/* Handle RNR NAK */
-			if (qp->ibqp.qp_type == IB_QPT_UC)
-				goto send_comp;
-			if (sqp->s_rnr_retry == 0) {
-				wc.status = IB_WC_RNR_RETRY_EXC_ERR;
-				goto err;
-			}
-			if (sqp->s_rnr_retry_cnt < 7)
-				sqp->s_rnr_retry--;
-			dev->n_rnr_naks++;
-			sqp->s_rnr_timeout =
-				ib_ipath_rnr_table[qp->r_min_rnr_timer];
-			ipath_insert_rnr_queue(sqp);
-			goto done;
-		}
+		if (!ipath_get_rwqe(qp, 0))
+			goto rnr_nak;
 		break;
 
 	case IB_WR_RDMA_WRITE_WITH_IMM:
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE))) {
-			wc.status = IB_WC_REM_INV_REQ_ERR;
-			goto err;
-		}
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.imm_data = wqe->wr.ex.imm_data;
 		if (!ipath_get_rwqe(qp, 1))
 			goto rnr_nak;
 		/* FALLTHROUGH */
 	case IB_WR_RDMA_WRITE:
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE))) {
-			wc.status = IB_WC_REM_INV_REQ_ERR;
-			goto err;
-		}
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
 		if (wqe->length == 0)
 			break;
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
-					    IB_ACCESS_REMOTE_WRITE))) {
-		acc_err:
-			wc.status = IB_WC_REM_ACCESS_ERR;
-		err:
-			wc.wr_id = wqe->wr.wr_id;
-			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-			wc.vendor_err = 0;
-			wc.byte_len = 0;
-			wc.qp = &sqp->ibqp;
-			wc.src_qp = sqp->remote_qpn;
-			wc.pkey_index = 0;
-			wc.slid = sqp->remote_ah_attr.dlid;
-			wc.sl = sqp->remote_ah_attr.sl;
-			wc.dlid_path_bits = 0;
-			wc.port_num = 0;
-			spin_lock_irqsave(&sqp->s_lock, flags);
-			ipath_sqerror_qp(sqp, &wc);
-			spin_unlock_irqrestore(&sqp->s_lock, flags);
-			goto done;
-		}
+					    IB_ACCESS_REMOTE_WRITE)))
+			goto acc_err;
 		break;
 
 	case IB_WR_RDMA_READ:
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ))) {
-			wc.status = IB_WC_REM_INV_REQ_ERR;
-			goto err;
-		}
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto inv_err;
 		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
@@ -394,11 +373,8 @@ again:
 
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC))) {
-			wc.status = IB_WC_REM_INV_REQ_ERR;
-			goto err;
-		}
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+			goto inv_err;
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
 					    wqe->wr.wr.atomic.remote_addr,
 					    wqe->wr.wr.atomic.rkey,
@@ -415,7 +391,8 @@ again:
 		goto send_comp;
 
 	default:
-		goto done;
+		send_status = IB_WC_LOC_QP_OP_ERR;
+		goto serr;
 	}
 
 	sge = &sqp->s_sge.sge;
@@ -448,8 +425,7 @@ again:
 		sqp->s_len -= len;
 	}
 
-	if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
-	    wqe->wr.opcode == IB_WR_RDMA_READ)
+	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -458,33 +434,89 @@ again:
 		wc.opcode = IB_WC_RECV;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
-	wc.vendor_err = 0;
 	wc.byte_len = wqe->length;
 	wc.qp = &qp->ibqp;
 	wc.src_qp = qp->remote_qpn;
-	wc.pkey_index = 0;
 	wc.slid = qp->remote_ah_attr.dlid;
 	wc.sl = qp->remote_ah_attr.sl;
-	wc.dlid_path_bits = 0;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 		       wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-	ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
+	ipath_send_complete(sqp, wqe, send_status);
 	goto again;
 
+rnr_nak:
+	/* Handle RNR NAK */
+	if (qp->ibqp.qp_type == IB_QPT_UC)
+		goto send_comp;
+	/*
+	 * Note: we don't need the s_lock held since the BUSY flag
+	 * makes this single threaded.
+	 */
+	if (sqp->s_rnr_retry == 0) {
+		send_status = IB_WC_RNR_RETRY_EXC_ERR;
+		goto serr;
+	}
+	if (sqp->s_rnr_retry_cnt < 7)
+		sqp->s_rnr_retry--;
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
+		goto clr_busy;
+	sqp->s_flags |= IPATH_S_WAITING;
+	dev->n_rnr_naks++;
+	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
+	ipath_insert_rnr_queue(sqp);
+	goto clr_busy;
+
+inv_err:
+	send_status = IB_WC_REM_INV_REQ_ERR;
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto err;
+
+acc_err:
+	send_status = IB_WC_REM_ACCESS_ERR;
+	wc.status = IB_WC_LOC_PROT_ERR;
+err:
+	/* responder goes to error state */
+	ipath_rc_error(qp, wc.status);
+
+serr:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	ipath_send_complete(sqp, wqe, send_status);
+	if (sqp->ibqp.qp_type == IB_QPT_RC) {
+		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+		sqp->s_flags &= ~IPATH_S_BUSY;
+		spin_unlock_irqrestore(&sqp->s_lock, flags);
+		if (lastwqe) {
+			struct ib_event ev;
+
+			ev.device = sqp->ibqp.device;
+			ev.element.qp = &sqp->ibqp;
+			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+		}
+		goto done;
+	}
+clr_busy:
+	sqp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
-	if (atomic_dec_and_test(&qp->refcount))
+	if (qp && atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
 }
 
 static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
 {
 	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-		qp->ibqp.qp_type == IB_QPT_SMI) {
+	    qp->ibqp.qp_type == IB_QPT_SMI) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -502,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
  * @dev: the device we ran out of buffers on
  *
  * Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
  */
-static void ipath_no_bufs_available(struct ipath_qp *qp,
+static int ipath_no_bufs_available(struct ipath_qp *qp,
 				    struct ipath_ibdev *dev)
 {
 	unsigned long flags;
+	int ret = 1;
 
 	/*
 	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
-	 * could be called.  If we are still in the tasklet function,
-	 * tasklet_hi_schedule() will not call us until the next time
-	 * tasklet_hi_schedule() is called.
-	 * We leave the busy flag set so that another post send doesn't
-	 * try to put the same QP on the piowait list again.
+	 * could be called. Therefore, put QP on the piowait list before
+	 * enabling the PIO avail interrupt.
 	 */
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	list_add_tail(&qp->piowait, &dev->piowait);
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-	want_buffer(dev->dd, qp);
-	dev->n_piowait++;
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+		dev->n_piowait++;
+		qp->s_flags |= IPATH_S_WAITING;
+		qp->s_flags &= ~IPATH_S_BUSY;
+		spin_lock(&dev->pending_lock);
+		if (list_empty(&qp->piowait))
+			list_add_tail(&qp->piowait, &dev->piowait);
+		spin_unlock(&dev->pending_lock);
+	} else
+		ret = 0;
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	if (ret)
+		want_buffer(dev->dd, qp);
+	return ret;
 }
 
 /**
@@ -597,15 +639,13 @@ void ipath_do_send(unsigned long data)
 	struct ipath_qp *qp = (struct ipath_qp *)data;
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	int (*make_req)(struct ipath_qp *qp);
-
-	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
-		goto bail;
+	unsigned long flags;
 
 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
 	     qp->ibqp.qp_type == IB_QPT_UC) &&
 	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
 		ipath_ruc_loopback(qp);
-		goto clear;
+		goto bail;
 	}
 
 	if (qp->ibqp.qp_type == IB_QPT_RC)
@@ -615,6 +655,19 @@ void ipath_do_send(unsigned long data)
 	else
 	       make_req = ipath_make_ud_req;
 
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	/* Return if we are already busy processing a work request. */
+	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		goto bail;
+	}
+
+	qp->s_flags |= IPATH_S_BUSY;
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
 again:
 	/* Check for a constructed packet to be sent. */
 	if (qp->s_hdrwords != 0) {
@@ -624,8 +677,8 @@ again:
 		 */
 		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
 				     qp->s_cur_sge, qp->s_cur_size)) {
-			ipath_no_bufs_available(qp, dev);
-			goto bail;
+			if (ipath_no_bufs_available(qp, dev))
+				goto bail;
 		}
 		dev->n_unicast_xmit++;
 		/* Record that we sent the packet and s_hdr is empty. */
@@ -634,16 +687,20 @@ again:
 
 	if (make_req(qp))
 		goto again;
-clear:
-	clear_bit(IPATH_S_BUSY, &qp->s_busy);
+
 bail:;
 }
 
+/*
+ * This should be called with s_lock held.
+ */
 void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
 			 enum ib_wc_status status)
 {
-	unsigned long flags;
-	u32 last;
+	u32 old_last, last;
+
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+		return;
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
 	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -651,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
 
+		memset(&wc, 0, sizeof wc);
 		wc.wr_id = wqe->wr.wr_id;
 		wc.status = status;
 		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		wc.vendor_err = 0;
-		wc.byte_len = wqe->length;
-		wc.imm_data = 0;
 		wc.qp = &qp->ibqp;
-		wc.src_qp = 0;
-		wc.wc_flags = 0;
-		wc.pkey_index = 0;
-		wc.slid = 0;
-		wc.sl = 0;
-		wc.dlid_path_bits = 0;
-		wc.port_num = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+		if (status == IB_WC_SUCCESS)
+			wc.byte_len = wqe->length;
+		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+			       status != IB_WC_SUCCESS);
 	}
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-	last = qp->s_last;
+	old_last = last = qp->s_last;
 	if (++last >= qp->s_size)
 		last = 0;
 	qp->s_last = last;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	if (qp->s_cur == old_last)
+		qp->s_cur = last;
+	if (qp->s_tail == old_last)
+		qp->s_tail = last;
+	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+		qp->s_draining = 0;
 }

+ 38 - 19
drivers/infiniband/hw/ipath/ipath_uc.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
 {
 	struct ipath_other_headers *ohdr;
 	struct ipath_swqe *wqe;
+	unsigned long flags;
 	u32 hwords;
 	u32 bth0;
 	u32 len;
 	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
 	int ret = 0;
 
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= IPATH_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
+	}
 
 	ohdr = &qp->s_hdr.u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
+		if (!(ib_ipath_state_ops[qp->state] &
+		    IPATH_PROCESS_NEXT_SEND_OK))
+			goto bail;
 		/* Check if send work queue is empty. */
 		if (qp->s_cur == qp->s_head)
-			goto done;
+			goto bail;
 		/*
 		 * Start a new request.
 		 */
@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
 			break;
 
 		default:
-			goto done;
+			goto bail;
 		}
 		break;
 
@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
 	ipath_make_ruc_header(to_idev(qp->ibqp.device),
 			      qp, ohdr, bth0 | (qp->s_state << 24),
 			      qp->s_next_psn++ & IPATH_PSN_MASK);
+done:
 	ret = 1;
+	goto unlock;
 
-done:
+bail:
+	qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	 */
 	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 
-	wc.imm_data = 0;
-	wc.wc_flags = 0;
+	memset(&wc, 0, sizeof wc);
 
 	/* Compare the PSN verses the expected PSN. */
 	if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 	send_first:
-		if (qp->r_reuse_sge) {
-			qp->r_reuse_sge = 0;
+		if (qp->r_flags & IPATH_R_REUSE_SGE) {
+			qp->r_flags &= ~IPATH_R_REUSE_SGE;
 			qp->r_sge = qp->s_rdma_read_sge;
 		} else if (!ipath_get_rwqe(qp, 0)) {
 			dev->n_pkt_drops++;
@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	case OP(SEND_MIDDLE):
 		/* Check for invalid length PMTU or posted rwqe len. */
 		if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-			qp->r_reuse_sge = 1;
+			qp->r_flags |= IPATH_R_REUSE_SGE;
 			dev->n_pkt_drops++;
 			goto done;
 		}
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len)) {
-			qp->r_reuse_sge = 1;
+			qp->r_flags |= IPATH_R_REUSE_SGE;
 			dev->n_pkt_drops++;
 			goto done;
 		}
@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		/* Check for invalid length. */
 		/* XXX LAST len should be >= 1 */
 		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			qp->r_reuse_sge = 1;
+			qp->r_flags |= IPATH_R_REUSE_SGE;
 			dev->n_pkt_drops++;
 			goto done;
 		}
@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		tlen -= (hdrsize + pad + 4);
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len)) {
-			qp->r_reuse_sge = 1;
+			qp->r_flags |= IPATH_R_REUSE_SGE;
 			dev->n_pkt_drops++;
 			goto done;
 		}
@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.opcode = IB_WC_RECV;
-		wc.vendor_err = 0;
 		wc.qp = &qp->ibqp;
 		wc.src_qp = qp->remote_qpn;
-		wc.pkey_index = 0;
 		wc.slid = qp->remote_ah_attr.dlid;
 		wc.sl = qp->remote_ah_attr.sl;
-		wc.dlid_path_bits = 0;
-		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
 		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 			       (ohdr->bth[0] &
@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			dev->n_pkt_drops++;
 			goto done;
 		}
-		if (qp->r_reuse_sge)
-			qp->r_reuse_sge = 0;
+		if (qp->r_flags & IPATH_R_REUSE_SGE)
+			qp->r_flags &= ~IPATH_R_REUSE_SGE;
 		else if (!ipath_get_rwqe(qp, 1)) {
 			dev->n_pkt_drops++;
 			goto done;

+ 48 - 18
drivers/infiniband/hw/ipath/ipath_ud.c

@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 	u32 length;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
-	if (!qp) {
+	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
 		dev->n_pkt_drops++;
-		goto send_comp;
+		goto done;
 	}
 
 	rsge.sg_list = NULL;
@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 	 * present on the wire.
 	 */
 	length = swqe->length;
+	memset(&wc, 0, sizeof wc);
 	wc.byte_len = length + sizeof(struct ib_grh);
 
 	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.imm_data = swqe->wr.ex.imm_data;
-	} else {
-		wc.wc_flags = 0;
-		wc.imm_data = 0;
 	}
 
 	/*
@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 	}
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
-	wc.vendor_err = 0;
 	wc.qp = &qp->ibqp;
 	wc.src_qp = sqp->ibqp.qp_num;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
@@ -248,8 +245,7 @@ drop:
 	kfree(rsge.sg_list);
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
-send_comp:
-	ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
+done:;
 }
 
 /**
@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
 	struct ipath_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct ipath_swqe *wqe;
+	unsigned long flags;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
 	u16 lid;
 	int ret = 0;
 
-	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
-		goto bail;
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= IPATH_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		goto done;
+	}
 
 	if (qp->s_cur == qp->s_head)
 		goto bail;
 
 	wqe = get_swqe_ptr(qp, qp->s_cur);
+	if (++qp->s_cur >= qp->s_size)
+		qp->s_cur = 0;
 
 	/* Construct the header. */
 	ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
 			dev->n_unicast_xmit++;
 	} else {
 		dev->n_unicast_xmit++;
-		lid = ah_attr->dlid &
-			~((1 << dev->dd->ipath_lmc) - 1);
+		lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
 		if (unlikely(lid == dev->dd->ipath_lid)) {
+			/*
+			 * If DMAs are in progress, we can't generate
+			 * a completion for the loopback packet since
+			 * it would be out of order.
+			 * XXX Instead of waiting, we could queue a
+			 * zero length descriptor so we get a callback.
+			 */
+			if (atomic_read(&qp->s_dma_busy)) {
+				qp->s_flags |= IPATH_S_WAIT_DMA;
+				goto bail;
+			}
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ipath_ud_loopback(qp, wqe);
+			spin_lock_irqsave(&qp->s_lock, flags);
+			ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
 			goto done;
 		}
 	}
@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
-	if (++qp->s_cur >= qp->s_size)
-		qp->s_cur = 0;
 	ret = 1;
+	goto unlock;
 
 bail:
+	qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_reuse_sge)
-		qp->r_reuse_sge = 0;
+	if (qp->r_flags & IPATH_R_REUSE_SGE)
+		qp->r_flags &= ~IPATH_R_REUSE_SGE;
 	else if (!ipath_get_rwqe(qp, 0)) {
 		/*
 		 * Count VL15 packets dropped due to no receive buffer.
@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	}
 	/* Silently drop packets which are too big. */
 	if (wc.byte_len > qp->r_len) {
-		qp->r_reuse_sge = 1;
+		qp->r_flags |= IPATH_R_REUSE_SGE;
 		dev->n_pkt_drops++;
 		goto bail;
 	}
@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
-	qp->r_wrid_valid = 0;
+	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+		goto bail;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;

+ 0 - 2
drivers/infiniband/hw/ipath/ipath_user_sdma.h

@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
 int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
 				  struct ipath_user_sdma_queue *pq);
 
-int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
-			     u32 counter);
 void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
 				 struct ipath_user_sdma_queue *pq);
 

+ 117 - 59
drivers/infiniband/hw/ipath/ipath_verbs.c

@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;
 module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; ipath_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = 0,
 	[IB_QPS_INIT] = IPATH_POST_RECV_OK,
 	[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 	[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
+	    IPATH_PROCESS_NEXT_SEND_OK,
 	[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK,
-	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-	[IB_QPS_ERR] = 0,
+	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+	[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
+	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 };
 
 struct ipath_ucontext {
@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 	}
 }
 
-static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-	struct ib_wc wc;
-
-	memset(&wc, 0, sizeof(wc));
-	wc.wr_id = wr->wr_id;
-	wc.status = IB_WC_WR_FLUSH_ERR;
-	wc.opcode = ib_ipath_wc_opcode[wr->opcode];
-	wc.qp = &qp->ibqp;
-	ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
-}
-
 /*
  * Count the number of DMA descriptors needed to send length bytes of data.
  * Don't modify the ipath_sge_state to get the count.
@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 	spin_lock_irqsave(&qp->s_lock, flags);
 
 	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) {
-		if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR)
-			goto bail_inval;
-		/* C10-96 says generate a flushed completion entry. */
-		ipath_flush_wqe(qp, wr);
-		ret = 0;
-		goto bail;
-	}
+	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
+		goto bail_inval;
 
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge)
@@ -677,6 +667,7 @@ bail:;
 static void ipath_ib_timer(struct ipath_ibdev *dev)
 {
 	struct ipath_qp *resend = NULL;
+	struct ipath_qp *rnr = NULL;
 	struct list_head *last;
 	struct ipath_qp *qp;
 	unsigned long flags;
@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
 		if (--qp->s_rnr_timeout == 0) {
 			do {
 				list_del_init(&qp->timerwait);
-				tasklet_hi_schedule(&qp->s_task);
+				qp->timer_next = rnr;
+				rnr = qp;
+				atomic_inc(&qp->refcount);
 				if (list_empty(last))
 					break;
 				qp = list_entry(last->next, struct ipath_qp,
@@ -743,16 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 	/* XXX What if timer fires again while this is running? */
-	for (qp = resend; qp != NULL; qp = qp->timer_next) {
-		struct ib_wc wc;
+	while (resend != NULL) {
+		qp = resend;
+		resend = qp->timer_next;
 
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
+		if (qp->s_last != qp->s_tail &&
+		    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
 			dev->n_timeouts++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			ipath_restart_rc(qp, qp->s_last_psn + 1);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 
+		/* Notify ipath_destroy_qp() if it is waiting. */
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+	while (rnr != NULL) {
+		qp = rnr;
+		rnr = qp->timer_next;
+
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+			ipath_schedule_send(qp);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+
 		/* Notify ipath_destroy_qp() if it is waiting. */
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
@@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)
 	struct ipath_verbs_txreq *tx = cookie;
 	struct ipath_qp *qp = tx->qp;
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	unsigned int flags;
+	enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+		IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
 
-	/* Generate a completion queue entry if needed */
-	if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) {
-		enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
+	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (tx->wqe)
+			ipath_send_complete(qp, tx->wqe, ibs);
+		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+		     qp->s_last != qp->s_head) ||
+		    (qp->s_flags & IPATH_S_WAIT_DMA))
+			ipath_schedule_send(qp);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		wake_up(&qp->wait_dma);
+	} else if (tx->wqe) {
+		spin_lock_irqsave(&qp->s_lock, flags);
 		ipath_send_complete(qp, tx->wqe, ibs);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
 	}
 
 	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
@@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)
 		wake_up(&qp->wait);
 }
 
+static void decrement_dma_busy(struct ipath_qp *qp)
+{
+	unsigned int flags;
+
+	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+		     qp->s_last != qp->s_head) ||
+		    (qp->s_flags & IPATH_S_WAIT_DMA))
+			ipath_schedule_send(qp);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		wake_up(&qp->wait_dma);
+	}
+}
+
 /*
  * Compute the number of clock cycles of delay before sending the next packet.
  * The multipliers reflect the number of clocks for the fastest rate so
@@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
 	if (tx) {
 		qp->s_tx = NULL;
 		/* resend previously constructed packet */
+		atomic_inc(&qp->s_dma_busy);
 		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-		if (ret)
+		if (ret) {
 			qp->s_tx = tx;
+			decrement_dma_busy(qp);
+		}
 		goto bail;
 	}
 
@@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
 		tx->txreq.sg_count = ndesc;
 		tx->map_len = (hdrwords + 2) << 2;
 		tx->txreq.map_addr = &tx->hdr;
+		atomic_inc(&qp->s_dma_busy);
 		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
 		if (ret) {
 			/* save ss and length in dwords */
 			tx->ss = ss;
 			tx->len = dwords;
 			qp->s_tx = tx;
+			decrement_dma_busy(qp);
 		}
 		goto bail;
 	}
@@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
 	memcpy(piobuf, hdr, hdrwords << 2);
 	ipath_copy_from_sge(piobuf + hdrwords, ss, len);
 
+	atomic_inc(&qp->s_dma_busy);
 	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
 	/*
 	 * If we couldn't queue the DMA request, save the info
@@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
 		tx->ss = NULL;
 		tx->len = 0;
 		qp->s_tx = tx;
+		decrement_dma_busy(qp);
 	}
 	dev->n_unaligned++;
 	goto bail;
@@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
 	unsigned flush_wc;
 	u32 control;
 	int ret;
+	unsigned int flags;
 
 	piobuf = ipath_getpiobuf(dd, plen, NULL);
 	if (unlikely(piobuf == NULL)) {
@@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
 	}
 	copy_io(piobuf, ss, len, flush_wc);
 done:
-	if (qp->s_wqe)
+	if (qp->s_wqe) {
+		spin_lock_irqsave(&qp->s_lock, flags);
 		ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+	}
 	ret = 0;
 bail:
 	return ret;
@@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
 	 * can defer SDMA restart until link goes ACTIVE without
 	 * worrying about just how we got there.
 	 */
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
+	if (qp->ibqp.qp_type == IB_QPT_SMI ||
+	    !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
 		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
 					   plen, dwords);
-	/* All non-VL15 packets are dropped if link is not ACTIVE */
-	else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		if (qp->s_wqe)
-			ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-		ret = 0;
-	} else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-					   plen, dwords);
 	else
-		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
 					   plen, dwords);
 
 	return ret;
@@ -1403,27 +1441,46 @@ bail:
  * This is called from ipath_intr() at interrupt level when a PIO buffer is
  * available after ipath_verbs_send() returned an error that no buffers were
  * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
+ * QPs waiting for buffers (for now, just restart the send tasklet and
  * return zero).
  */
 int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 {
+	struct list_head *list;
+	struct ipath_qp *qplist;
 	struct ipath_qp *qp;
 	unsigned long flags;
 
 	if (dev == NULL)
 		goto bail;
 
+	list = &dev->piowait;
+	qplist = NULL;
+
 	spin_lock_irqsave(&dev->pending_lock, flags);
-	while (!list_empty(&dev->piowait)) {
-		qp = list_entry(dev->piowait.next, struct ipath_qp,
-				piowait);
+	while (!list_empty(list)) {
+		qp = list_entry(list->next, struct ipath_qp, piowait);
 		list_del_init(&qp->piowait);
-		clear_bit(IPATH_S_BUSY, &qp->s_busy);
-		tasklet_hi_schedule(&qp->s_task);
+		qp->pio_next = qplist;
+		qplist = qp;
+		atomic_inc(&qp->refcount);
 	}
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
+	while (qplist != NULL) {
+		qp = qplist;
+		qplist = qp->pio_next;
+
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+			ipath_schedule_send(qp);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+
+		/* Notify ipath_destroy_qp() if it is waiting. */
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+
 bail:
 	return 0;
 }
@@ -2145,11 +2202,12 @@ bail:
 void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
 	struct ib_device *ibdev = &dev->ibdev;
-
-	disable_timer(dev->dd);
+	u32 qps_inuse;
 
 	ib_unregister_device(ibdev);
 
+	disable_timer(dev->dd);
+
 	if (!list_empty(&dev->pending[0]) ||
 	    !list_empty(&dev->pending[1]) ||
 	    !list_empty(&dev->pending[2]))
@@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 	 * Note that ipath_unregister_ib_device() can be called before all
 	 * the QPs are destroyed!
 	 */
-	ipath_free_all_qps(&dev->qp_table);
+	qps_inuse = ipath_free_all_qps(&dev->qp_table);
+	if (qps_inuse)
+		ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
+			qps_inuse);
 	kfree(dev->qp_table.table);
 	kfree(dev->lk_table.table);
 	kfree(dev->txreq_bufs);
@@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
 		      "RC OTH NAKs %d\n"
 		      "RC timeouts %d\n"
 		      "RC RDMA dup %d\n"
-		      "RC stalls   %d\n"
 		      "piobuf wait %d\n"
-		      "no piobuf   %d\n"
 		      "unaligned   %d\n"
 		      "PKT drops   %d\n"
 		      "WQE errs    %d\n",
 		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
 		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
 		      dev->n_other_naks, dev->n_timeouts,
-		      dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
-		      dev->n_no_piobuf, dev->n_unaligned,
+		      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
 		      dev->n_pkt_drops, dev->n_wqe_errs);
 	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
 		const struct ipath_opcode_stats *si = &dev->opstats[i];

+ 52 - 12
drivers/infiniband/hw/ipath/ipath_verbs.h

@@ -74,6 +74,11 @@
 #define IPATH_POST_RECV_OK		0x02
 #define IPATH_PROCESS_RECV_OK		0x04
 #define IPATH_PROCESS_SEND_OK		0x08
+#define IPATH_PROCESS_NEXT_SEND_OK	0x10
+#define IPATH_FLUSH_SEND		0x20
+#define IPATH_FLUSH_RECV		0x40
+#define IPATH_PROCESS_OR_FLUSH_SEND \
+	(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
 
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE	0x00
@@ -353,12 +358,14 @@ struct ipath_qp {
 	struct ib_qp ibqp;
 	struct ipath_qp *next;		/* link list for QPN hash table */
 	struct ipath_qp *timer_next;	/* link list for ipath_ib_timer() */
+	struct ipath_qp *pio_next;	/* link for ipath_ib_piobufavail() */
 	struct list_head piowait;	/* link for wait PIO buf */
 	struct list_head timerwait;	/* link for waiting for timeouts */
 	struct ib_ah_attr remote_ah_attr;
 	struct ipath_ib_header s_hdr;	/* next packet header to send */
 	atomic_t refcount;
 	wait_queue_head_t wait;
+	wait_queue_head_t wait_dma;
 	struct tasklet_struct s_task;
 	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
@@ -369,7 +376,7 @@ struct ipath_qp {
 	struct ipath_sge_state s_rdma_read_sge;
 	struct ipath_sge_state r_sge;	/* current receive data */
 	spinlock_t s_lock;
-	unsigned long s_busy;
+	atomic_t s_dma_busy;
 	u16 s_pkt_delay;
 	u16 s_hdrwords;		/* size of s_hdr in 32 bit words */
 	u32 s_cur_size;		/* size of send packet in bytes */
@@ -383,6 +390,7 @@ struct ipath_qp {
 	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
 	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
 	u64 r_wr_id;		/* ID for current receive WQE */
+	unsigned long r_aflags;
 	u32 r_len;		/* total length of r_sge */
 	u32 r_rcv_len;		/* receive data len processed */
 	u32 r_psn;		/* expected rcv packet sequence number */
@@ -394,8 +402,7 @@ struct ipath_qp {
 	u8 r_state;		/* opcode of last packet received */
 	u8 r_nak_state;		/* non-zero if NAK is pending */
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
-	u8 r_reuse_sge;		/* for UC receive errors */
-	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
+	u8 r_flags;
 	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
 	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
 	u8 qp_access_flags;
@@ -404,13 +411,13 @@ struct ipath_qp {
 	u8 s_rnr_retry_cnt;
 	u8 s_retry;		/* requester retry counter */
 	u8 s_rnr_retry;		/* requester RNR retry counter */
-	u8 s_wait_credit;	/* limit number of unacked packets sent */
 	u8 s_pkey_index;	/* PKEY index to use */
 	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
 	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
 	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
 	u8 s_flags;
 	u8 s_dmult;
+	u8 s_draining;
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
@@ -428,16 +435,40 @@ struct ipath_qp {
 	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
-/* Bit definition for s_busy. */
-#define IPATH_S_BUSY		0
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define IPATH_R_WRID_VALID	0
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define IPATH_R_REUSE_SGE	0x01
+#define IPATH_R_RDMAR_SEQ	0x02
 
 /*
  * Bit definitions for s_flags.
+ *
+ * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
+ *			   before processing the next SWQE
+ * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
+ *			   before processing the next SWQE
+ * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
+ * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ *		      next send completion entry not via send DMA.
  */
 #define IPATH_S_SIGNAL_REQ_WR	0x01
 #define IPATH_S_FENCE_PENDING	0x02
 #define IPATH_S_RDMAR_PENDING	0x04
 #define IPATH_S_ACK_PENDING	0x08
+#define IPATH_S_BUSY		0x10
+#define IPATH_S_WAITING		0x20
+#define IPATH_S_WAIT_SSN_CREDIT	0x40
+#define IPATH_S_WAIT_DMA	0x80
+
+#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
+	IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
 
 #define IPATH_PSN_CREDIT	512
 
@@ -573,13 +604,11 @@ struct ipath_ibdev {
 	u32 n_rnr_naks;
 	u32 n_other_naks;
 	u32 n_timeouts;
-	u32 n_rc_stalls;
 	u32 n_pkt_drops;
 	u32 n_vl15_dropped;
 	u32 n_wqe_errs;
 	u32 n_rdma_dup_busy;
 	u32 n_piowait;
-	u32 n_no_piobuf;
 	u32 n_unaligned;
 	u32 port_cap_flags;
 	u32 pma_sample_start;
@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
 	return container_of(ibdev, struct ipath_ibdev, ibdev);
 }
 
+/*
+ * This must be called with s_lock held.
+ */
+static inline void ipath_schedule_send(struct ipath_qp *qp)
+{
+	if (qp->s_flags & IPATH_S_ANY_WAIT)
+		qp->s_flags &= ~IPATH_S_ANY_WAIT;
+	if (!(qp->s_flags & IPATH_S_BUSY))
+		tasklet_hi_schedule(&qp->s_task);
+}
+
 int ipath_process_mad(struct ib_device *ibdev,
 		      int mad_flags,
 		      u8 port_num,
@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_qp_init_attr *init_attr);
 
-void ipath_free_all_qps(struct ipath_qp_table *qpt);
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
 
 int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
 
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
-
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
 unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
 
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
 
 int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
 

+ 0 - 4
drivers/infiniband/hw/nes/nes.c

@@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0;
 module_param_named(debug_level, nes_debug_level, uint, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug output level");
 
-unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
-module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR);
-MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation");
-
 LIST_HEAD(nes_adapter_list);
 static LIST_HEAD(nes_dev_list);
 

+ 0 - 1
drivers/infiniband/hw/nes/nes.h

@@ -173,7 +173,6 @@ extern int disable_mpa_crc;
 extern unsigned int send_first;
 extern unsigned int nes_drv_opt;
 extern unsigned int nes_debug_level;
-extern unsigned int nes_lro_max_aggr;
 
 extern struct list_head nes_adapter_list;
 

+ 5 - 1
drivers/infiniband/hw/nes/nes_hw.c

@@ -42,6 +42,10 @@
 
 #include "nes.h"
 
+static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, uint, 0444);
+MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
+
 static u32 crit_err_count;
 u32 int_mod_timer_init;
 u32 int_mod_cq_depth_256;
@@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
 			jumbomode = 1;
 		nes_nic_init_timer_defaults(nesdev, jumbomode);
 	}
-	nesvnic->lro_mgr.max_aggr       = NES_LRO_MAX_AGGR;
+	nesvnic->lro_mgr.max_aggr       = nes_lro_max_aggr;
 	nesvnic->lro_mgr.max_desc       = NES_MAX_LRO_DESCRIPTORS;
 	nesvnic->lro_mgr.lro_arr        = nesvnic->lro_desc;
 	nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;